]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
configure.in (AC_OUTPUT_COMMANDS): Link $THREADS.h and c++threads.h instead of copying.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
041c3194
ZW
23/*
24
25Cleanups to do:-
26
27o Fix ISTABLE to flag the parts we want for IS_HSPACE and IS_NEWLINE.
28o Get use of digraphs in sync with the standard reqd on the command line.
29o -dM and with _cpp_dump_list: too many \n output.
30o Put a printer object in cpp_reader?
31o Check line numbers assigned to all errors.
32o Replace strncmp with memcmp almost everywhere.
33o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
34o Get rid of cpp_get_directive_token.
35o Convert do_ functions to return void. Kaveh thinks its OK; and said he'll
36 give it a run when we've got some code.
37o _cpp_parse_expr updated to new lexer.
38o Distinguish integers, floats, and 'other' pp-numbers.
39o Store ints and char constants as binary values.
40o New command-line assertion syntax.
41o Merge hash table text pointer and token list text pointer for identifiers.
42o Have _cpp_parse_expr use all the information the new lexer provides.
43o Work towards functions in cpperror.c taking a message level parameter.
44 If we do this, merge the common code of do_warning and do_error.
45o Comment all functions, and describe macro expansion algorithm.
46o Move as much out of header files as possible.
47o Remove single quote pairs `', and some '', from diagnostics.
48o Correct pastability test for CPP_NAME and CPP_NUMBER.
49
50*/
51
45b966db
ZW
52#include "config.h"
53#include "system.h"
54#include "intl.h"
55#include "cpplib.h"
56#include "cpphash.h"
041c3194 57#include "symcat.h"
45b966db 58
041c3194
ZW
59#define auto_expand_name_space(list) \
60 _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
f2d5f0cc
ZW
61static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
62 size_t, FILE *));
041c3194 63static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
1368ee70 64 unsigned int));
041c3194 65static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
9e62c811 66 unsigned int));
f2d5f0cc 67
041c3194
ZW
68static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
69static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
b8f41010
NB
70 unsigned char *));
71static const unsigned char *backslash_start PARAMS ((cpp_reader *,
72 const unsigned char *));
041c3194
ZW
73static int skip_block_comment PARAMS ((cpp_reader *));
74static int skip_line_comment PARAMS ((cpp_reader *));
b8f41010
NB
75static void skip_whitespace PARAMS ((cpp_reader *, int));
76static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
77static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
041c3194
ZW
78static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
79 unsigned int));
b8f41010 80static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
f624ffa7
NB
81static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
82 const unsigned char *,
ad265aa4 83 unsigned int, unsigned int));
041c3194
ZW
84static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
85static int lex_next PARAMS ((cpp_reader *, int));
86static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
87 const cpp_token *));
b8f41010 88
041c3194
ZW
89static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
90static void expand_context_stack PARAMS ((cpp_reader *));
d1d9a6bd 91static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
041c3194
ZW
92 unsigned char *));
93static void output_token PARAMS ((cpp_reader *, const cpp_token *,
94 const cpp_token *));
b8f41010
NB
95typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
96 cpp_token *));
041c3194
ZW
97static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
98 unsigned int));
99static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
100static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
101 const cpp_token *));
102static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
103static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
104 const cpp_token *));
105static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
106 const cpp_token *, int *));
107static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
108static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
109static cpp_token *get_temp_token PARAMS ((cpp_reader *));
110static void release_temp_tokens PARAMS ((cpp_reader *));
111static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
112static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
b8f41010 113
d1d9a6bd
NB
114#define INIT_TOKEN_NAME(list, token) \
115 do {(token)->val.name.len = 0; \
116 (token)->val.name.text = (list)->namebuf + (list)->name_used; \
d1d9a6bd 117 } while (0)
b8f41010 118
041c3194
ZW
119#define VALID_SIGN(c, prevc) \
120 (((c) == '+' || (c) == '-') && \
121 ((prevc) == 'e' || (prevc) == 'E' \
122 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
123
b8f41010
NB
124/* Maybe put these in the ISTABLE eventually. */
125#define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
126#define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
127
128/* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
129 character, if any, is in buffer. */
041c3194 130
b8f41010 131#define handle_newline(cur, limit, c) \
041c3194 132 do { \
b8f41010
NB
133 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
134 (cur)++; \
041c3194
ZW
135 pfile->buffer->lineno++; \
136 pfile->buffer->line_base = (cur); \
6ab3e7dd 137 pfile->col_adjust = 0; \
041c3194 138 } while (0)
b8f41010 139
041c3194 140#define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
b8f41010
NB
141#define PREV_TOKEN_TYPE (cur_token[-1].type)
142
f617b8e2
NB
143#define PUSH_TOKEN(ttype) cur_token++->type = ttype
144#define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
145#define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
146#define BACKUP_DIGRAPH(ttype) do { \
147 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
148
149/* An upper bound on the number of bytes needed to spell a token,
150 including preceding whitespace. */
f624ffa7
NB
151#define TOKEN_LEN(token) (5 + (token_spellings[(token)->type].type > \
152 SPELL_NONE ? (token)->val.name.len: 0))
f617b8e2 153
f617b8e2 154#define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
5d7ee2fa
NB
155#define I(e, s) {SPELL_IDENT, s},
156#define S(e, s) {SPELL_STRING, s},
b8f41010
NB
157#define C(e, s) {SPELL_CHAR, s},
158#define N(e, s) {SPELL_NONE, s},
b8f41010 159
041c3194
ZW
160const struct token_spelling
161token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
162
163#undef T
164#undef I
165#undef S
166#undef C
167#undef N
168
169/* For debugging: the internal names of the tokens. */
170#define T(e, s) STRINGX(e),
171#define I(e, s) STRINGX(e),
172#define S(e, s) STRINGX(e),
173#define C(e, s) STRINGX(e),
174#define N(e, s) STRINGX(e),
175
176const char * const token_names[N_TTYPES] = { TTYPE_TABLE };
b8f41010
NB
177
178#undef T
5d7ee2fa
NB
179#undef I
180#undef S
b8f41010
NB
181#undef C
182#undef N
b8f41010 183
041c3194
ZW
184/* The following table is used by trigraph_ok/trigraph_replace. If we
185 have designated initializers, it can be constant data; otherwise,
186 it is set up at runtime by _cpp_init_input_buffer. */
187
188#if (GCC_VERSION >= 2007)
189#define init_trigraph_map() /* nothing */
190#define TRIGRAPH_MAP \
191__extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
192#define END };
193#define s(p, v) [p] = v,
194#else
195#define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
196 static void init_trigraph_map PARAMS ((void)) { \
197 unsigned char *x = trigraph_map;
198#define END }
199#define s(p, v) x[p] = v;
200#endif
201
202TRIGRAPH_MAP
203 s('=', '#') s(')', ']') s('!', '|')
204 s('(', '[') s('\'', '^') s('>', '}')
205 s('/', '\\') s('<', '{') s('-', '~')
206END
207
208#undef TRIGRAPH_MAP
209#undef END
210#undef s
211
45b966db
ZW
212/* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
213
214void
215_cpp_grow_token_buffer (pfile, n)
216 cpp_reader *pfile;
217 long n;
218{
219 long old_written = CPP_WRITTEN (pfile);
220 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
221 pfile->token_buffer = (U_CHAR *)
222 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
223 CPP_SET_WRITTEN (pfile, old_written);
224}
225
f2d5f0cc
ZW
226/* Deal with the annoying semantics of fwrite. */
227static void
228safe_fwrite (pfile, buf, len, fp)
229 cpp_reader *pfile;
230 const U_CHAR *buf;
231 size_t len;
232 FILE *fp;
233{
234 size_t count;
45b966db 235
f2d5f0cc
ZW
236 while (len)
237 {
238 count = fwrite (buf, 1, len, fp);
239 if (count == 0)
240 goto error;
241 len -= count;
242 buf += count;
243 }
244 return;
245
246 error:
247 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
248}
249
250/* Notify the compiler proper that the current line number has jumped,
251 or the current file name has changed. */
252
253static void
1368ee70 254output_line_command (pfile, print, line)
45b966db 255 cpp_reader *pfile;
f2d5f0cc 256 cpp_printer *print;
1368ee70 257 unsigned int line;
45b966db 258{
041c3194 259 cpp_buffer *ip = CPP_BUFFER (pfile);
f2d5f0cc
ZW
260 enum { same = 0, enter, leave, rname } change;
261 static const char * const codes[] = { "", " 1", " 2", "" };
262
041c3194
ZW
263 if (line == 0)
264 return;
265
266 /* End the previous line of text. */
267 if (pfile->need_newline)
268 putc ('\n', print->outf);
269 pfile->need_newline = 0;
270
f2d5f0cc
ZW
271 if (CPP_OPTION (pfile, no_line_commands))
272 return;
273
041c3194
ZW
274 /* If ip is null, we've been called from cpp_finish, and they just
275 needed the final flush and trailing newline. */
276 if (!ip)
277 return;
278
fb753f88 279 if (pfile->include_depth == print->last_id)
54bef41d 280 {
fb753f88
JJ
281 /* Determine whether the current filename has changed, and if so,
282 how. 'nominal_fname' values are unique, so they can be compared
283 by comparing pointers. */
284 if (ip->nominal_fname == print->last_fname)
285 change = same;
286 else
0e500c78 287 change = rname;
fb753f88
JJ
288 }
289 else
290 {
291 if (pfile->include_depth > print->last_id)
292 change = enter;
f2d5f0cc 293 else
fb753f88
JJ
294 change = leave;
295 print->last_id = pfile->include_depth;
45b966db 296 }
fb753f88
JJ
297 print->last_fname = ip->nominal_fname;
298
f2d5f0cc
ZW
299 /* If the current file has not changed, we can output a few newlines
300 instead if we want to increase the line number by a small amount.
301 We cannot do this if print->lineno is zero, because that means we
302 haven't output any line commands yet. (The very first line
303 command output is a `same_file' command.) */
041c3194 304 if (change == same && print->lineno > 0
f2d5f0cc 305 && line >= print->lineno && line < print->lineno + 8)
45b966db 306 {
f2d5f0cc 307 while (line > print->lineno)
45b966db 308 {
f2d5f0cc
ZW
309 putc ('\n', print->outf);
310 print->lineno++;
45b966db 311 }
f2d5f0cc 312 return;
45b966db 313 }
f2d5f0cc
ZW
314
315#ifndef NO_IMPLICIT_EXTERN_C
316 if (CPP_OPTION (pfile, cplusplus))
317 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
318 codes[change],
c31a6508
ZW
319 ip->inc->sysp ? " 3" : "",
320 (ip->inc->sysp == 2) ? " 4" : "");
f2d5f0cc
ZW
321 else
322#endif
323 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
324 codes[change],
c31a6508 325 ip->inc->sysp ? " 3" : "");
f2d5f0cc
ZW
326 print->lineno = line;
327}
328
329/* Write the contents of the token_buffer to the output stream, and
330 clear the token_buffer. Also handles generating line commands and
331 keeping track of file transitions. */
332
333void
041c3194 334cpp_output_tokens (pfile, print, line)
f2d5f0cc
ZW
335 cpp_reader *pfile;
336 cpp_printer *print;
041c3194 337 unsigned int line;
f2d5f0cc 338{
f6fab919
ZW
339 if (CPP_WRITTEN (pfile) - print->written)
340 {
f6fab919
ZW
341 safe_fwrite (pfile, pfile->token_buffer,
342 CPP_WRITTEN (pfile) - print->written, print->outf);
041c3194
ZW
343 pfile->need_newline = 1;
344 if (print->lineno)
345 print->lineno++;
45b966db 346
041c3194 347 CPP_SET_WRITTEN (pfile, print->written);
f2d5f0cc 348 }
041c3194 349 output_line_command (pfile, print, line);
45b966db
ZW
350}
351
c56c2073 352/* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
f2d5f0cc
ZW
353
354void
355cpp_scan_buffer_nooutput (pfile)
356 cpp_reader *pfile;
357{
f2d5f0cc 358 unsigned int old_written = CPP_WRITTEN (pfile);
041c3194
ZW
359 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
360
f2d5f0cc
ZW
361 for (;;)
362 {
041c3194
ZW
363 /* In no-output mode, we can ignore everything but directives. */
364 const cpp_token *token = cpp_get_token (pfile);
365 if (token->type == CPP_EOF)
366 {
367 cpp_pop_buffer (pfile);
368 if (CPP_BUFFER (pfile) == stop)
369 break;
370 }
371 _cpp_skip_rest_of_line (pfile);
f2d5f0cc
ZW
372 }
373 CPP_SET_WRITTEN (pfile, old_written);
374}
375
c56c2073 376/* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
f2d5f0cc
ZW
377
378void
379cpp_scan_buffer (pfile, print)
380 cpp_reader *pfile;
381 cpp_printer *print;
382{
c56c2073 383 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
041c3194 384 const cpp_token *token, *prev = 0;
f2d5f0cc
ZW
385
386 for (;;)
387 {
388 token = cpp_get_token (pfile);
041c3194 389 if (token->type == CPP_EOF)
f2d5f0cc 390 {
041c3194
ZW
391 cpp_pop_buffer (pfile);
392 if (CPP_BUFFER (pfile) == stop)
f2d5f0cc 393 return;
041c3194
ZW
394 cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
395 prev = 0;
396 continue;
397 }
398
399 if (token->flags & BOL)
400 {
401 cpp_output_tokens (pfile, print, pfile->token_list.line);
402 prev = 0;
f2d5f0cc 403 }
041c3194
ZW
404
405 output_token (pfile, token, prev);
406 prev = token;
f2d5f0cc
ZW
407 }
408}
409
041c3194
ZW
410/* Helper routine used by parse_include, which can't see spell_token.
411 Reinterpret the current line as an h-char-sequence (< ... >); we are
412 looking at the first token after the <. */
413const cpp_token *
414_cpp_glue_header_name (pfile)
45b966db
ZW
415 cpp_reader *pfile;
416{
041c3194
ZW
417 unsigned int written = CPP_WRITTEN (pfile);
418 const cpp_token *t;
419 cpp_token *hdr;
420 U_CHAR *buf;
421 size_t len;
422
423 for (;;)
424 {
425 t = cpp_get_token (pfile);
426 if (t->type == CPP_GREATER || t->type == CPP_EOF)
427 break;
428
429 CPP_RESERVE (pfile, TOKEN_LEN (t));
430 if (t->flags & PREV_WHITE)
431 CPP_PUTC_Q (pfile, ' ');
432 pfile->limit = spell_token (pfile, t, pfile->limit);
433 }
434
435 if (t->type == CPP_EOF)
436 cpp_error (pfile, "missing terminating > character");
45b966db 437
041c3194
ZW
438 len = CPP_WRITTEN (pfile) - written;
439 buf = xmalloc (len);
440 memcpy (buf, pfile->token_buffer + written, len);
441 CPP_SET_WRITTEN (pfile, written);
442
443 hdr = get_temp_token (pfile);
444 hdr->type = CPP_HEADER_NAME;
445 hdr->flags = 0;
446 hdr->val.name.text = buf;
447 hdr->val.name.len = len;
448 return hdr;
45b966db
ZW
449}
450
1368ee70
ZW
451/* Token-buffer helper functions. */
452
d1d9a6bd
NB
453/* Expand a token list's string space. It is *vital* that
454 list->tokens_used is correct, to get pointer fix-up right. */
041c3194
ZW
455void
456_cpp_expand_name_space (list, len)
1368ee70 457 cpp_toklist *list;
c5a04734
ZW
458 unsigned int len;
459{
f617b8e2 460 const U_CHAR *old_namebuf;
f617b8e2
NB
461
462 old_namebuf = list->namebuf;
c5a04734
ZW
463 list->name_cap += len;
464 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
f617b8e2
NB
465
466 /* Fix up token text pointers. */
79f50f2a 467 if (list->namebuf != old_namebuf)
f617b8e2
NB
468 {
469 unsigned int i;
470
471 for (i = 0; i < list->tokens_used; i++)
472 if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
79f50f2a 473 list->tokens[i].val.name.text += (list->namebuf - old_namebuf);
f617b8e2 474 }
1368ee70
ZW
475}
476
041c3194
ZW
477/* If there is not enough room for LEN more characters, expand the
478 list by just enough to have room for LEN characters. */
479void
480_cpp_reserve_name_space (list, len)
481 cpp_toklist *list;
482 unsigned int len;
483{
484 unsigned int room = list->name_cap - list->name_used;
485
486 if (room < len)
487 _cpp_expand_name_space (list, len - room);
488}
489
1368ee70 490/* Expand the number of tokens in a list. */
d1d9a6bd
NB
491void
492_cpp_expand_token_space (list, count)
1368ee70 493 cpp_toklist *list;
d1d9a6bd 494 unsigned int count;
1368ee70 495{
d1d9a6bd
NB
496 unsigned int n;
497
498 list->tokens_cap += count;
499 n = list->tokens_cap;
15dad1d9 500 if (list->flags & LIST_OFFSET)
d1d9a6bd 501 list->tokens--, n++;
1368ee70 502 list->tokens = (cpp_token *)
d1d9a6bd 503 xrealloc (list->tokens, n * sizeof (cpp_token));
15dad1d9
ZW
504 if (list->flags & LIST_OFFSET)
505 list->tokens++; /* Skip the dummy. */
1368ee70
ZW
506}
507
d1d9a6bd
NB
508/* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
509 an extra token in front of the token list, as this allows the lexer
510 to always peek at the previous token without worrying about
511 underflowing the list, and some initial space. Otherwise, no
512 token- or name-space is allocated, and there is no dummy token. */
15dad1d9 513void
d1d9a6bd 514_cpp_init_toklist (list, flags)
1368ee70 515 cpp_toklist *list;
d1d9a6bd 516 int flags;
1368ee70 517{
d1d9a6bd
NB
518 if (flags == NO_DUMMY_TOKEN)
519 {
520 list->tokens_cap = 0;
041c3194 521 list->tokens = 0;
d1d9a6bd 522 list->name_cap = 0;
041c3194 523 list->namebuf = 0;
d1d9a6bd
NB
524 list->flags = 0;
525 }
526 else
527 {
528 /* Initialize token space. Put a dummy token before the start
529 that will fail matches. */
530 list->tokens_cap = 256; /* 4K's worth. */
531 list->tokens = (cpp_token *)
532 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
533 list->tokens[0].type = CPP_EOF;
534 list->tokens++;
535
536 /* Initialize name space. */
537 list->name_cap = 1024;
041c3194 538 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
d1d9a6bd
NB
539 list->flags = LIST_OFFSET;
540 }
15dad1d9 541
15dad1d9
ZW
542 _cpp_clear_toklist (list);
543}
1368ee70 544
15dad1d9
ZW
545/* Clear a token list. */
546void
547_cpp_clear_toklist (list)
548 cpp_toklist *list;
549{
c5a04734
ZW
550 list->tokens_used = 0;
551 list->name_used = 0;
041c3194
ZW
552 list->directive = 0;
553 list->paramc = 0;
554 list->params_len = 0;
15dad1d9
ZW
555 list->flags &= LIST_OFFSET; /* clear all but that one */
556}
557
558/* Free a token list. Does not free the list itself, which may be
559 embedded in a larger structure. */
560void
561_cpp_free_toklist (list)
041c3194 562 const cpp_toklist *list;
15dad1d9 563{
15dad1d9
ZW
564 if (list->flags & LIST_OFFSET)
565 free (list->tokens - 1); /* Backup over dummy token. */
566 else
567 free (list->tokens);
568 free (list->namebuf);
1368ee70
ZW
569}
570
15dad1d9
ZW
571/* Compare two tokens. */
572int
573_cpp_equiv_tokens (a, b)
574 const cpp_token *a, *b;
575{
041c3194
ZW
576 if (a->type == b->type && a->flags == b->flags)
577 switch (token_spellings[a->type].type)
578 {
579 default: /* Keep compiler happy. */
580 case SPELL_OPERATOR:
581 return 1;
582 case SPELL_CHAR:
583 case SPELL_NONE:
584 return a->val.aux == b->val.aux; /* arg_no or character. */
585 case SPELL_IDENT:
586 case SPELL_STRING:
587 return (a->val.name.len == b->val.name.len
588 && !memcmp (a->val.name.text, b->val.name.text,
589 a->val.name.len));
590 }
15dad1d9 591
041c3194 592 return 0;
15dad1d9
ZW
593}
594
595/* Compare two token lists. */
596int
597_cpp_equiv_toklists (a, b)
598 const cpp_toklist *a, *b;
599{
600 unsigned int i;
601
041c3194
ZW
602 if (a->tokens_used != b->tokens_used
603 || a->flags != b->flags
604 || a->paramc != b->paramc)
15dad1d9
ZW
605 return 0;
606
607 for (i = 0; i < a->tokens_used; i++)
608 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
609 return 0;
610 return 1;
611}
612
041c3194
ZW
613/* Utility routine:
614 Compares, in the manner of strcmp(3), the token beginning at TOKEN
615 and extending for LEN characters to the NUL-terminated string
616 STRING. Typical usage:
9e62c811 617
041c3194
ZW
618 if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
619 "inline"))
620 { ... }
621 */
15dad1d9 622
041c3194
ZW
623int
624cpp_idcmp (token, len, string)
625 const U_CHAR *token;
626 size_t len;
627 const char *string;
628{
629 size_t len2 = strlen (string);
630 int r;
9e62c811 631
041c3194
ZW
632 if ((r = memcmp (token, string, MIN (len, len2))))
633 return r;
1368ee70 634
041c3194
ZW
635 /* The longer of the two strings sorts after the shorter. */
636 if (len == len2)
637 return 0;
638 else if (len < len2)
639 return -1;
640 else
641 return 1;
15dad1d9 642}
1368ee70 643
041c3194 644/* Lexing algorithm.
45b966db 645
041c3194
ZW
646 The original lexer in cpplib was made up of two passes: a first pass
647 that replaced trigraphs and deleted esacped newlines, and a second
648 pass that tokenized the result of the first pass. Tokenisation was
649 performed by peeking at the next character in the input stream. For
650 example, if the input stream contained "!=", the handler for the !
651 character would peek at the next character, and if it were a '='
652 would skip over it, and return a "!=" token, otherwise it would
653 return just the "!" token.
61474454 654
041c3194
ZW
655 To implement a single-pass lexer, this peeking ahead is unworkable.
656 An arbitrary number of escaped newlines, and trigraphs (in particular
657 ??/ which translates to the escape \), could separate the '!' and '='
658 in the input stream, yet the next token is still a "!=".
61474454 659
041c3194
ZW
660 Suppose instead that we lex by one logical line at a time, producing
661 a token list or stack for each logical line, and when seeing the '!'
662 push a CPP_NOT token on the list. Then if the '!' is part of a
663 longer token ("!=") we know we must see the remainder of the token by
664 the time we reach the end of the logical line. Thus we can have the
665 '=' handler look at the previous token (at the end of the list / top
666 of the stack) and see if it is a "!" token, and if so, instead of
667 pushing a "=" token revise the existing token to be a "!=" token.
61474454 668
041c3194
ZW
669 This works in the presence of escaped newlines, because the '\' would
670 have been pushed on the top of the stack as a CPP_BACKSLASH. The
671 newline ('\n' or '\r') handler looks at the token at the top of the
672 stack to see if it is a CPP_BACKSLASH, and if so discards both.
673 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
674 the '=' handler would never see any intervening escaped newlines.
45b966db 675
041c3194
ZW
676 To make trigraphs work in this context, as in precedence trigraphs
677 are highest and converted before anything else, the '?' handler does
678 lookahead to see if it is a trigraph, and if so skips the trigraph
679 and pushes the token it represents onto the top of the stack. This
680 also works in the particular case of a CPP_BACKSLASH trigraph.
45b966db 681
041c3194
ZW
682 To the preprocessor, whitespace is only significant to the point of
683 knowing whether whitespace precedes a particular token. For example,
684 the '=' handler needs to know whether there was whitespace between it
685 and a "!" token on the top of the stack, to make the token conversion
686 decision correctly. So each token has a PREV_WHITE flag to
687 indicate this - the standard permits consecutive whitespace to be
688 regarded as a single space. The compiler front ends are not
689 interested in whitespace at all; they just require a token stream.
690 Another place where whitespace is significant to the preprocessor is
691 a #define statment - if there is whitespace between the macro name
692 and an initial "(" token the macro is "object-like", otherwise it is
693 a function-like macro that takes arguments.
694
695 However, all is not rosy. Parsing of identifiers, numbers, comments
696 and strings becomes trickier because of the possibility of raw
697 trigraphs and escaped newlines in the input stream.
698
699 The trigraphs are three consecutive characters beginning with two
700 question marks. A question mark is not valid as part of a number or
701 identifier, so parsing of a number or identifier terminates normally
702 upon reaching it, returning to the mainloop which handles the
703 trigraph just like it would in any other position. Similarly for the
704 backslash of a backslash-newline combination. So we just need the
705 escaped-newline dropper in the mainloop to check if the token on the
706 top of the stack after dropping the escaped newline is a number or
707 identifier, and if so to continue the processing it as if nothing had
708 happened.
709
710 For strings, we replace trigraphs whenever we reach a quote or
711 newline, because there might be a backslash trigraph escaping them.
712 We need to be careful that we start trigraph replacing from where we
713 left off previously, because it is possible for a first scan to leave
714 "fake" trigraphs that a second scan would pick up as real (e.g. the
715 sequence "????/\n=" would find a fake ??= trigraph after removing the
716 escaped newline.)
717
718 For line comments, on reaching a newline we scan the previous
719 character(s) to see if it escaped, and continue if it is. Block
720 comments ignore everything and just focus on finding the comment
721 termination mark. The only difficult thing, and it is surprisingly
722 tricky, is checking if an asterisk precedes the final slash since
723 they could be separated by escaped newlines. If the preprocessor is
724 invoked with the output comments option, we don't bother removing
725 escaped newlines and replacing trigraphs for output.
726
727 Finally, numbers can begin with a period, which is pushed initially
728 as a CPP_DOT token in its own right. The digit handler checks if the
729 previous token was a CPP_DOT not separated by whitespace, and if so
730 pops it off the stack and pushes a period into the number's buffer
731 before calling the number parser.
45b966db 732
041c3194
ZW
733*/
734
735static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
736 U":>", U"<%", U"%>"};
45b966db 737
041c3194
ZW
738/* Call when a trigraph is encountered. It warns if necessary, and
739 returns true if the trigraph should be honoured. END is the third
740 character of a trigraph in the input stream. */
45b966db 741static int
041c3194 742trigraph_ok (pfile, end)
45b966db 743 cpp_reader *pfile;
041c3194 744 const unsigned char *end;
45b966db 745{
041c3194
ZW
746 int accept = CPP_OPTION (pfile, trigraphs);
747
748 if (CPP_OPTION (pfile, warn_trigraphs))
45b966db 749 {
041c3194
ZW
750 unsigned int col = end - 1 - pfile->buffer->line_base;
751 if (accept)
752 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
753 "trigraph ??%c converted to %c",
754 (int) *end, (int) trigraph_map[*end]);
45b966db 755 else
041c3194
ZW
756 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
757 "trigraph ??%c ignored", (int) *end);
45b966db 758 }
041c3194 759 return accept;
45b966db
ZW
760}
761
041c3194
ZW
762/* Scan a string for trigraphs, warning or replacing them inline as
763 appropriate. When parsing a string, we must call this routine
764 before processing a newline character (if trigraphs are enabled),
765 since the newline might be escaped by a preceding backslash
766 trigraph sequence. Returns a pointer to the end of the name after
767 replacement. */
768
769static unsigned char *
770trigraph_replace (pfile, src, limit)
45b966db 771 cpp_reader *pfile;
041c3194
ZW
772 unsigned char *src;
773 unsigned char *limit;
45b966db 774{
041c3194
ZW
775 unsigned char *dest;
776
777 /* Starting with src[1], find two consecutive '?'. The case of no
778 trigraphs is streamlined. */
779
043afb2a 780 for (src++; src + 1 < limit; src += 2)
041c3194
ZW
781 {
782 if (src[0] != '?')
783 continue;
784
785 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
786 if (src[-1] == '?')
787 src--;
788 else if (src + 2 == limit || src[1] != '?')
789 continue;
45b966db 790
041c3194
ZW
791 /* Check if it really is a trigraph. */
792 if (trigraph_map[src[2]] == 0)
793 continue;
45b966db 794
041c3194
ZW
795 dest = src;
796 goto trigraph_found;
797 }
798 return limit;
45b966db 799
041c3194
ZW
800 /* Now we have a trigraph, we need to scan the remaining buffer, and
801 copy-shifting its contents left if replacement is enabled. */
802 for (; src + 2 < limit; dest++, src++)
803 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
804 {
805 trigraph_found:
806 src += 2;
807 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
808 *dest = trigraph_map[*src];
809 }
810
811 /* Copy remaining (at most 2) characters. */
812 while (src < limit)
813 *dest++ = *src++;
814 return dest;
45b966db
ZW
815}
816
041c3194
ZW
817/* If CUR is a backslash or the end of a trigraphed backslash, return
818 a pointer to its beginning, otherwise NULL. We don't read beyond
819 the buffer start, because there is the start of the comment in the
820 buffer. */
821static const unsigned char *
822backslash_start (pfile, cur)
64aaf407 823 cpp_reader *pfile;
041c3194 824 const unsigned char *cur;
64aaf407 825{
041c3194
ZW
826 if (cur[0] == '\\')
827 return cur;
828 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
829 && trigraph_ok (pfile, cur))
830 return cur - 2;
831 return 0;
64aaf407
NB
832}
833
041c3194
ZW
834/* Skip a C-style block comment. This is probably the trickiest
835 handler. We find the end of the comment by seeing if an asterisk
836 is before every '/' we encounter. The nasty complication is that a
837 previous asterisk may be separated by one or more escaped newlines.
838 Returns non-zero if comment terminated by EOF, zero otherwise. */
839static int
840skip_block_comment (pfile)
45b966db
ZW
841 cpp_reader *pfile;
842{
041c3194
ZW
843 cpp_buffer *buffer = pfile->buffer;
844 const unsigned char *char_after_star = 0;
845 register const unsigned char *cur = buffer->cur;
846 int seen_eof = 0;
847
848 /* Inner loop would think the comment has ended if the first comment
849 character is a '/'. Avoid this and keep the inner loop clean by
850 skipping such a character. */
851 if (cur < buffer->rlimit && cur[0] == '/')
852 cur++;
64aaf407 853
041c3194 854 for (; cur < buffer->rlimit; )
45b966db 855 {
041c3194
ZW
856 unsigned char c = *cur++;
857
858 /* People like decorating comments with '*', so check for
859 '/' instead for efficiency. */
860 if (c == '/')
45b966db 861 {
041c3194
ZW
862 if (cur[-2] == '*' || cur - 1 == char_after_star)
863 goto out;
864
865 /* Warn about potential nested comments, but not when
866 the final character inside the comment is a '/'.
867 Don't bother to get it right across escaped newlines. */
868 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
869 && cur[0] == '*' && cur[1] != '/')
45b966db 870 {
041c3194
ZW
871 buffer->cur = cur;
872 cpp_warning (pfile, "'/*' within comment");
45b966db 873 }
45b966db 874 }
041c3194 875 else if (IS_NEWLINE(c))
45b966db 876 {
041c3194
ZW
877 const unsigned char* bslash = backslash_start (pfile, cur - 2);
878
879 handle_newline (cur, buffer->rlimit, c);
880 /* Work correctly if there is an asterisk before an
881 arbirtrarily long sequence of escaped newlines. */
882 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
883 char_after_star = cur;
884 else
885 char_after_star = 0;
45b966db 886 }
45b966db 887 }
041c3194
ZW
888 seen_eof = 1;
889
64aaf407 890 out:
041c3194
ZW
891 buffer->cur = cur;
892 return seen_eof;
45b966db
ZW
893}
894
041c3194
ZW
895/* Skip a C++ or Chill line comment. Handles escaped newlines.
896 Returns non-zero if a multiline comment. */
897static int
898skip_line_comment (pfile)
45b966db
ZW
899 cpp_reader *pfile;
900{
041c3194
ZW
901 cpp_buffer *buffer = pfile->buffer;
902 register const unsigned char *cur = buffer->cur;
903 int multiline = 0;
904
905 for (; cur < buffer->rlimit; )
906 {
907 unsigned char c = *cur++;
908
909 if (IS_NEWLINE (c))
45b966db 910 {
041c3194
ZW
911 /* Check for a (trigaph?) backslash escaping the newline. */
912 if (!backslash_start (pfile, cur - 2))
913 goto out;
914 multiline = 1;
915 handle_newline (cur, buffer->rlimit, c);
916 }
917 }
918 cur++;
45b966db 919
041c3194
ZW
920 out:
921 buffer->cur = cur - 1; /* Leave newline for caller. */
922 return multiline;
923}
45b966db 924
041c3194
ZW
925/* Skips whitespace, stopping at next non-whitespace character.
926 Adjusts pfile->col_adjust to account for tabs. This enables tokens
927 to be assigned the correct column. */
928static void
929skip_whitespace (pfile, in_directive)
930 cpp_reader *pfile;
931 int in_directive;
932{
933 cpp_buffer *buffer = pfile->buffer;
934 register const unsigned char *cur = buffer->cur;
935 unsigned short null_count = 0;
45b966db 936
041c3194
ZW
937 for (; cur < buffer->rlimit; )
938 {
939 unsigned char c = *cur++;
45b966db 940
041c3194
ZW
941 if (c == '\t')
942 {
943 unsigned int col = CPP_BUF_COLUMN (buffer, cur - 1);
944 pfile->col_adjust += (CPP_OPTION (pfile, tabstop) - 1
945 - col % CPP_OPTION(pfile, tabstop));
45b966db 946 }
041c3194
ZW
947 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
948 continue;
949 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
950 goto out;
951 if (c == '\0')
952 null_count++;
953 /* Mut be '\f' or '\v' */
954 else if (in_directive && CPP_PEDANTIC (pfile))
955 cpp_pedwarn (pfile, "%s in preprocessing directive",
956 c == '\f' ? "formfeed" : "vertical tab");
45b966db 957 }
041c3194 958 cur++;
45b966db 959
041c3194
ZW
960 out:
961 buffer->cur = cur - 1;
962 if (null_count)
963 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
964 : "embedded null character ignored");
965}
45b966db 966
041c3194
ZW
967/* Parse (append) an identifier. */
968static void
969parse_name (pfile, list, name)
45b966db 970 cpp_reader *pfile;
041c3194
ZW
971 cpp_toklist *list;
972 cpp_name *name;
45b966db 973{
041c3194
ZW
974 const unsigned char *name_limit;
975 unsigned char *namebuf;
976 cpp_buffer *buffer = pfile->buffer;
977 register const unsigned char *cur = buffer->cur;
978
979 expanded:
980 name_limit = list->namebuf + list->name_cap;
981 namebuf = list->namebuf + list->name_used;
982
983 for (; cur < buffer->rlimit && namebuf < name_limit; )
984 {
985 unsigned char c = *namebuf = *cur; /* Copy a single char. */
45b966db 986
041c3194
ZW
987 if (! is_idchar(c))
988 goto out;
989 namebuf++;
990 cur++;
e5ec2402
ZW
991 /* $ is not a legal identifier character in the standard, but is
992 commonly accepted as an extension. Don't warn about it in
993 skipped conditional blocks. */
994 if (c == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
041c3194
ZW
995 {
996 buffer->cur = cur;
997 cpp_pedwarn (pfile, "'$' character in identifier");
998 }
999 }
45b966db 1000
041c3194
ZW
1001 /* Run out of name space? */
1002 if (cur < buffer->rlimit)
1003 {
1004 list->name_used = namebuf - list->namebuf;
1005 auto_expand_name_space (list);
1006 goto expanded;
1007 }
1008
1009 out:
1010 buffer->cur = cur;
1011 name->len = namebuf - name->text;
1012 list->name_used = namebuf - list->namebuf;
45b966db
ZW
1013}
1014
041c3194 1015/* Parse (append) a number. */
45b966db 1016static void
041c3194 1017parse_number (pfile, list, name)
45b966db 1018 cpp_reader *pfile;
041c3194
ZW
1019 cpp_toklist *list;
1020 cpp_name *name;
45b966db 1021{
041c3194
ZW
1022 const unsigned char *name_limit;
1023 unsigned char *namebuf;
1024 cpp_buffer *buffer = pfile->buffer;
1025 register const unsigned char *cur = buffer->cur;
45b966db 1026
041c3194
ZW
1027 expanded:
1028 name_limit = list->namebuf + list->name_cap;
1029 namebuf = list->namebuf + list->name_used;
45b966db 1030
041c3194
ZW
1031 for (; cur < buffer->rlimit && namebuf < name_limit; )
1032 {
1033 unsigned char c = *namebuf = *cur; /* Copy a single char. */
45b966db 1034
041c3194
ZW
1035 /* Perhaps we should accept '$' here if we accept it for
1036 identifiers. We know namebuf[-1] is safe, because for c to
1037 be a sign we must have pushed at least one character. */
1038 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1039 goto out;
45b966db 1040
041c3194
ZW
1041 namebuf++;
1042 cur++;
45b966db 1043 }
64aaf407 1044
041c3194
ZW
1045 /* Run out of name space? */
1046 if (cur < buffer->rlimit)
1047 {
1048 list->name_used = namebuf - list->namebuf;
1049 auto_expand_name_space (list);
1050 goto expanded;
1051 }
1052
64aaf407 1053 out:
041c3194
ZW
1054 buffer->cur = cur;
1055 name->len = namebuf - name->text;
1056 list->name_used = namebuf - list->namebuf;
45b966db
ZW
1057}
1058
041c3194
ZW
1059/* Places a string terminated by an unescaped TERMINATOR into a
1060 cpp_name, which should be expandable and thus at the top of the
1061 list's stack. Handles embedded trigraphs, if necessary, and
1062 escaped newlines.
45b966db 1063
041c3194
ZW
1064 Can be used for character constants (terminator = '\''), string
1065 constants ('"') and angled headers ('>'). Multi-line strings are
1066 allowed, except for within directives. */
45b966db 1067
041c3194
ZW
1068static void
1069parse_string (pfile, list, token, terminator)
45b966db 1070 cpp_reader *pfile;
041c3194
ZW
1071 cpp_toklist *list;
1072 cpp_token *token;
1073 unsigned int terminator;
45b966db 1074{
041c3194
ZW
1075 cpp_buffer *buffer = pfile->buffer;
1076 cpp_name *name = &token->val.name;
1077 register const unsigned char *cur = buffer->cur;
1078 const unsigned char *name_limit;
1079 unsigned char *namebuf;
1080 unsigned int null_count = 0;
1081 unsigned int trigraphed = list->name_used;
45b966db 1082
041c3194
ZW
1083 expanded:
1084 name_limit = list->namebuf + list->name_cap;
1085 namebuf = list->namebuf + list->name_used;
f2d5f0cc 1086
041c3194 1087 for (; cur < buffer->rlimit && namebuf < name_limit; )
45b966db 1088 {
041c3194 1089 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
45b966db 1090
041c3194
ZW
1091 if (c == '\0')
1092 null_count++;
1093 else if (c == terminator || IS_NEWLINE (c))
45b966db 1094 {
041c3194
ZW
1095 /* Needed for trigraph_replace and multiline string warning. */
1096 buffer->cur = cur;
5eec0563 1097
041c3194
ZW
1098 /* Scan for trigraphs before checking if backslash-escaped. */
1099 if ((CPP_OPTION (pfile, trigraphs)
1100 || CPP_OPTION (pfile, warn_trigraphs))
1101 && namebuf - (list->namebuf + trigraphed) >= 3)
45b966db 1102 {
041c3194
ZW
1103 namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1104 namebuf);
1105 /* The test above guarantees trigraphed will be positive. */
1106 trigraphed = namebuf - list->namebuf - 2;
45b966db 1107 }
45b966db 1108
041c3194
ZW
1109 namebuf--; /* Drop the newline / terminator from the name. */
1110 if (IS_NEWLINE (c))
45b966db 1111 {
041c3194
ZW
1112 /* Drop a backslash newline, and continue. */
1113 if (namebuf[-1] == '\\')
45b966db 1114 {
041c3194
ZW
1115 handle_newline (cur, buffer->rlimit, c);
1116 namebuf--;
1117 continue;
45b966db 1118 }
45b966db 1119
041c3194 1120 cur--;
45b966db 1121
041c3194
ZW
1122 /* In Fortran and assembly language, silently terminate
1123 strings of either variety at end of line. This is a
1124 kludge around not knowing where comments are in these
1125 languages. */
1126 if (CPP_OPTION (pfile, lang_fortran)
1127 || CPP_OPTION (pfile, lang_asm))
1128 goto out;
64aaf407 1129
041c3194
ZW
1130 /* Character constants, headers and asserts may not
1131 extend over multiple lines. In Standard C, neither
1132 may strings. We accept multiline strings as an
1133 extension. (Even in directives - otherwise, glibc's
1134 longlong.h breaks.) */
1135 if (terminator != '"')
1136 goto unterminated;
1137
1138 cur++; /* Move forwards again. */
45b966db 1139
041c3194
ZW
1140 if (pfile->multiline_string_line == 0)
1141 {
1142 pfile->multiline_string_line = token->line;
1143 pfile->multiline_string_column = token->col;
1144 if (CPP_PEDANTIC (pfile))
1145 cpp_pedwarn (pfile, "multi-line string constant");
1146 }
1147
1148 *namebuf++ = '\n';
1149 handle_newline (cur, buffer->rlimit, c);
45b966db
ZW
1150 }
1151 else
1152 {
041c3194 1153 unsigned char *temp;
45b966db 1154
041c3194
ZW
1155 /* An odd number of consecutive backslashes represents
1156 an escaped terminator. */
1157 temp = namebuf - 1;
1158 while (temp >= name->text && *temp == '\\')
1159 temp--;
45b966db 1160
041c3194
ZW
1161 if ((namebuf - temp) & 1)
1162 goto out;
1163 namebuf++;
1164 }
45b966db 1165 }
ff2b53ef 1166 }
45b966db 1167
041c3194
ZW
1168 /* Run out of name space? */
1169 if (cur < buffer->rlimit)
45b966db 1170 {
041c3194
ZW
1171 list->name_used = namebuf - list->namebuf;
1172 auto_expand_name_space (list);
1173 goto expanded;
1174 }
45b966db 1175
041c3194
ZW
1176 /* We may not have trigraph-replaced the input for this code path,
1177 but as the input is in error by being unterminated we don't
1178 bother. Prevent warnings about no newlines at EOF. */
1179 if (IS_NEWLINE(cur[-1]))
1180 cur--;
45b966db 1181
041c3194
ZW
1182 unterminated:
1183 cpp_error (pfile, "missing terminating %c character", (int) terminator);
476f2869 1184
041c3194
ZW
1185 if (terminator == '\"' && pfile->multiline_string_line != list->line
1186 && pfile->multiline_string_line != 0)
1187 {
1188 cpp_error_with_line (pfile, pfile->multiline_string_line,
1189 pfile->multiline_string_column,
1190 "possible start of unterminated string literal");
1191 pfile->multiline_string_line = 0;
45b966db 1192 }
041c3194
ZW
1193
1194 out:
1195 buffer->cur = cur;
1196 name->len = namebuf - name->text;
1197 list->name_used = namebuf - list->namebuf;
45b966db 1198
041c3194
ZW
1199 if (null_count > 0)
1200 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1201 : "null character preserved"));
45b966db
ZW
1202}
1203
041c3194
ZW
1204/* The character TYPE helps us distinguish comment types: '*' = C
1205 style, '-' = Chill-style and '/' = C++ style. For code simplicity,
1206 the stored comment includes the comment start and any terminator. */
1207
1208#define COMMENT_START_LEN 2
9e62c811 1209static void
041c3194
ZW
1210save_comment (list, token, from, len, type)
1211 cpp_toklist *list;
1212 cpp_token *token;
1213 const unsigned char *from;
9e62c811 1214 unsigned int len;
041c3194 1215 unsigned int type;
9e62c811 1216{
041c3194
ZW
1217 unsigned char *buffer;
1218
1219 len += COMMENT_START_LEN;
9e62c811 1220
041c3194
ZW
1221 if (list->name_used + len > list->name_cap)
1222 _cpp_expand_name_space (list, len);
9e62c811 1223
041c3194
ZW
1224 INIT_TOKEN_NAME (list, token);
1225 token->type = CPP_COMMENT;
1226 token->val.name.len = len;
45b966db 1227
041c3194
ZW
1228 buffer = list->namebuf + list->name_used;
1229 list->name_used += len;
45b966db 1230
041c3194
ZW
1231 /* Copy the comment. */
1232 if (type == '*')
45b966db 1233 {
041c3194
ZW
1234 *buffer++ = '/';
1235 *buffer++ = '*';
45b966db 1236 }
041c3194 1237 else
ea4a453b 1238 {
041c3194
ZW
1239 *buffer++ = type;
1240 *buffer++ = type;
ea4a453b 1241 }
041c3194 1242 memcpy (buffer, from, len - COMMENT_START_LEN);
45b966db
ZW
1243}
1244
041c3194
ZW
1245/*
1246 * The tokenizer's main loop. Returns a token list, representing a
1247 * logical line in the input file. On EOF after some tokens have
1248 * been processed, we return immediately. Then in next call, or if
1249 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1250 * token is placed in the list.
1251 *
1252 * Implementation relies almost entirely on lookback, rather than
1253 * looking forwards. This means that tokenization requires just
1254 * a single pass of the file, even in the presence of trigraphs and
1255 * escaped newlines, providing significant performance benefits.
1256 * Trigraph overhead is negligible if they are disabled, and low
1257 * even when enabled.
1258 */
1259
1260#define IS_DIRECTIVE() (list->directive != 0)
1261#define MIGHT_BE_DIRECTIVE() \
1262(cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
45b966db 1263
041c3194
ZW
1264static void
1265lex_line (pfile, list)
45b966db 1266 cpp_reader *pfile;
041c3194 1267 cpp_toklist *list;
45b966db 1268{
041c3194
ZW
1269 cpp_token *cur_token, *token_limit, *first;
1270 cpp_buffer *buffer = pfile->buffer;
1271 const unsigned char *cur = buffer->cur;
1272 unsigned char flags = 0;
1273 unsigned int first_token = list->tokens_used;
45b966db 1274
041c3194
ZW
1275 if (!(list->flags & LIST_OFFSET))
1276 (abort) ();
1277
1278 list->file = buffer->nominal_fname;
1279 list->line = CPP_BUF_LINE (buffer);
1280 pfile->col_adjust = 0;
1281 pfile->in_lex_line = 1;
1282 if (cur == buffer->buf)
1283 list->flags |= BEG_OF_FILE;
1284
1285 expanded:
1286 token_limit = list->tokens + list->tokens_cap;
1287 cur_token = list->tokens + list->tokens_used;
45b966db 1288
041c3194 1289 for (; cur < buffer->rlimit && cur_token < token_limit;)
45b966db 1290 {
041c3194 1291 unsigned char c;
45b966db 1292
041c3194
ZW
1293 /* Optimize whitespace skipping, as most tokens are probably
1294 separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
1295 c = *cur++;
1296 if (is_hspace (c))
45b966db 1297 {
041c3194
ZW
1298 /* Step back to get the null warning and tab correction. */
1299 buffer->cur = cur - 1;
1300 skip_whitespace (pfile, IS_DIRECTIVE ());
1301 cur = buffer->cur;
ff2b53ef 1302
041c3194
ZW
1303 flags = PREV_WHITE;
1304 if (cur == buffer->rlimit)
1305 break;
1306 c = *cur++;
45b966db 1307 }
45b966db 1308
041c3194
ZW
1309 /* Initialize current token. CPP_EOF will not be fixed up by
1310 expand_name_space. */
1311 list->tokens_used = cur_token - list->tokens + 1;
1312 cur_token->type = CPP_EOF;
1313 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1314 cur_token->line = CPP_BUF_LINE (buffer);
1315 cur_token->flags = flags;
1316 flags = 0;
46d07497 1317
041c3194
ZW
1318 switch (c)
1319 {
1320 case '0': case '1': case '2': case '3': case '4':
1321 case '5': case '6': case '7': case '8': case '9':
1322 {
1323 int prev_dot;
46d07497 1324
041c3194
ZW
1325 cur--; /* Backup character. */
1326 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1327 if (prev_dot)
1328 cur_token--;
1329 INIT_TOKEN_NAME (list, cur_token);
1330 /* Prepend an immediately previous CPP_DOT token. */
1331 if (prev_dot)
1332 {
1333 if (list->name_cap == list->name_used)
1334 auto_expand_name_space (list);
46d07497 1335
041c3194
ZW
1336 cur_token->val.name.len = 1;
1337 list->namebuf[list->name_used++] = '.';
1338 }
46d07497 1339
041c3194
ZW
1340 continue_number:
1341 cur_token->type = CPP_NUMBER; /* Before parse_number. */
1342 buffer->cur = cur;
1343 parse_number (pfile, list, &cur_token->val.name);
1344 cur = buffer->cur;
1345 }
1346 /* Check for # 123 form of #line. */
1347 if (MIGHT_BE_DIRECTIVE ())
1348 list->directive = _cpp_check_linemarker (pfile, cur_token,
1349 !(cur_token[-1].flags
1350 & PREV_WHITE));
1351 cur_token++;
1352 break;
46d07497 1353
041c3194
ZW
1354 letter:
1355 case '_':
1356 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1357 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1358 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1359 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1360 case 'y': case 'z':
1361 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1362 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1363 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1364 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1365 case 'Y': case 'Z':
1366 cur--; /* Backup character. */
1367 INIT_TOKEN_NAME (list, cur_token);
1368 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
1369
1370 continue_name:
1371 buffer->cur = cur;
1372 parse_name (pfile, list, &cur_token->val.name);
1373 cur = buffer->cur;
45b966db 1374
041c3194
ZW
1375 if (MIGHT_BE_DIRECTIVE ())
1376 list->directive = _cpp_check_directive (pfile, cur_token,
1377 !(list->tokens[0].flags
1378 & PREV_WHITE));
1379 cur_token++;
1380 break;
f8f769ea 1381
041c3194
ZW
1382 case '\'':
1383 /* Character constants are not recognized when processing Fortran,
1384 or if -traditional. */
1385 if (CPP_OPTION (pfile, lang_fortran) || CPP_TRADITIONAL (pfile))
1386 goto other;
45b966db 1387
041c3194
ZW
1388 /* Fall through. */
1389 case '\"':
1390 /* Traditionally, escaped strings are not strings. */
1391 if (CPP_TRADITIONAL (pfile) && IMMED_TOKEN ()
1392 && PREV_TOKEN_TYPE == CPP_BACKSLASH)
1393 goto other;
04e3ec78 1394
041c3194
ZW
1395 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1396 /* Do we have a wide string? */
1397 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1398 && cur_token[-1].val.name.len == 1
1399 && cur_token[-1].val.name.text[0] == 'L'
1400 && !CPP_TRADITIONAL (pfile))
04e3ec78 1401 {
041c3194
ZW
1402 /* No need for 'L' any more. */
1403 list->name_used--;
1404 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
04e3ec78 1405 }
45b966db 1406
041c3194
ZW
1407 do_parse_string:
1408 /* Here c is one of ' " or >. */
1409 INIT_TOKEN_NAME (list, cur_token);
1410 buffer->cur = cur;
1411 parse_string (pfile, list, cur_token, c);
1412 cur = buffer->cur;
1413 cur_token++;
1414 break;
45b966db 1415
041c3194
ZW
1416 case '/':
1417 cur_token->type = CPP_DIV;
1418 if (IMMED_TOKEN ())
1419 {
1420 if (PREV_TOKEN_TYPE == CPP_DIV)
1421 {
1422 /* We silently allow C++ comments in system headers,
1423 irrespective of conformance mode, because lots of
1424 broken systems do that and trying to clean it up
1425 in fixincludes is a nightmare. */
1426 if (CPP_IN_SYSTEM_HEADER (pfile))
1427 goto do_line_comment;
1428 else if (CPP_OPTION (pfile, cplusplus_comments))
1429 {
1430 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1431 && ! buffer->warned_cplusplus_comments)
1432 {
1433 buffer->cur = cur;
1434 cpp_pedwarn (pfile,
1435 "C++ style comments are not allowed in ISO C89");
1436 cpp_pedwarn (pfile,
1437 "(this will be reported only once per input file)");
1438 buffer->warned_cplusplus_comments = 1;
1439 }
1440 do_line_comment:
1441 buffer->cur = cur;
1442#if 0 /* Leave until new lexer in place. */
1443 if (cur[-2] != c)
1444 cpp_warning (pfile,
1445 "comment start split across lines");
1446#endif
1447 if (skip_line_comment (pfile))
1448 cpp_warning (pfile, "multi-line comment");
f8f769ea 1449
041c3194
ZW
1450 /* Back-up to first '-' or '/'. */
1451 cur_token--;
1452 if (!CPP_OPTION (pfile, discard_comments)
1453 && (!IS_DIRECTIVE()
1454 || (list->directive->flags & COMMENTS)))
1455 save_comment (list, cur_token++, cur,
1456 buffer->cur - cur, c);
1457 else if (!CPP_OPTION (pfile, traditional))
1458 flags = PREV_WHITE;
1459
1460 cur = buffer->cur;
1461 break;
1462 }
1463 }
1464 }
1465 cur_token++;
1466 break;
1467
1468 case '*':
1469 cur_token->type = CPP_MULT;
1470 if (IMMED_TOKEN ())
45b966db 1471 {
041c3194
ZW
1472 if (PREV_TOKEN_TYPE == CPP_DIV)
1473 {
1474 buffer->cur = cur;
1475#if 0 /* Leave until new lexer in place. */
1476 if (cur[-2] != '/')
1477 cpp_warning (pfile,
1478 "comment start '/*' split across lines");
1479#endif
1480 if (skip_block_comment (pfile))
1481 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1482 "unterminated comment");
1483#if 0 /* Leave until new lexer in place. */
1484 else if (buffer->cur[-2] != '*')
1485 cpp_warning (pfile,
1486 "comment end '*/' split across lines");
1487#endif
1488 /* Back up to opening '/'. */
1489 cur_token--;
1490 if (!CPP_OPTION (pfile, discard_comments)
1491 && (!IS_DIRECTIVE()
1492 || (list->directive->flags & COMMENTS)))
1493 save_comment (list, cur_token++, cur,
1494 buffer->cur - cur, c);
1495 else if (!CPP_OPTION (pfile, traditional))
1496 flags = PREV_WHITE;
1497
1498 cur = buffer->cur;
1499 break;
1500 }
1501 else if (CPP_OPTION (pfile, cplusplus))
1502 {
1503 /* In C++, there are .* and ->* operators. */
1504 if (PREV_TOKEN_TYPE == CPP_DEREF)
1505 BACKUP_TOKEN (CPP_DEREF_STAR);
1506 else if (PREV_TOKEN_TYPE == CPP_DOT)
1507 BACKUP_TOKEN (CPP_DOT_STAR);
1508 }
45b966db 1509 }
041c3194 1510 cur_token++;
f8f769ea 1511 break;
45b966db 1512
041c3194
ZW
1513 case '\n':
1514 case '\r':
1515 handle_newline (cur, buffer->rlimit, c);
1516 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
45b966db 1517 {
041c3194 1518 if (IMMED_TOKEN ())
45b966db 1519 {
041c3194
ZW
1520 /* Remove the escaped newline. Then continue to process
1521 any interrupted name or number. */
1522 cur_token--;
1523 /* Backslash-newline may not be immediately followed by
1524 EOF (C99 5.1.1.2). */
1525 if (cur >= buffer->rlimit)
1526 {
1527 cpp_pedwarn (pfile, "backslash-newline at end of file");
1528 break;
1529 }
1530 if (IMMED_TOKEN ())
1531 {
1532 cur_token--;
1533 if (cur_token->type == CPP_NAME)
1534 goto continue_name;
1535 else if (cur_token->type == CPP_NUMBER)
1536 goto continue_number;
1537 cur_token++;
1538 }
1539 /* Remember whitespace setting. */
1540 flags = cur_token->flags;
f8f769ea 1541 break;
45b966db 1542 }
041c3194
ZW
1543 else
1544 {
1545 buffer->cur = cur;
1546 cpp_warning (pfile,
1547 "backslash and newline separated by space");
1548 }
1549 }
1550 else if (MIGHT_BE_DIRECTIVE ())
1551 {
1552 /* "Null directive." C99 6.10.7: A preprocessing
1553 directive of the form # <new-line> has no effect.
1554
1555 But it is still a directive, and therefore disappears
1556 from the output. */
1557 cur_token--;
1558 if (cur_token->flags & PREV_WHITE)
45b966db 1559 {
041c3194
ZW
1560 if (CPP_WTRADITIONAL (pfile))
1561 cpp_warning (pfile,
1562 "K+R C ignores #\\n with the # indented");
1563 if (CPP_TRADITIONAL (pfile))
1564 cur_token++;
45b966db 1565 }
f8f769ea 1566 }
45b966db 1567
041c3194
ZW
1568 /* Skip vertical space until we have at least one token to
1569 return. */
1570 if (cur_token != &list->tokens[first_token])
1571 goto out;
1572 list->line = CPP_BUF_LINE (buffer);
f8f769ea 1573 break;
04e3ec78 1574
041c3194
ZW
1575 case '-':
1576 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1577 {
1578 if (CPP_OPTION (pfile, chill))
1579 goto do_line_comment;
1580 REVISE_TOKEN (CPP_MINUS_MINUS);
1581 }
1582 else
1583 PUSH_TOKEN (CPP_MINUS);
1584 break;
45b966db 1585
041c3194
ZW
1586 make_hash:
1587 case '#':
1588 /* The digraph flag checking ensures that ## and %:%:
1589 are interpreted as CPP_PASTE, but #%: and %:# are not. */
1590 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1591 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1592 REVISE_TOKEN (CPP_PASTE);
1593 else
1594 PUSH_TOKEN (CPP_HASH);
1595 break;
04e3ec78 1596
041c3194
ZW
1597 case ':':
1598 cur_token->type = CPP_COLON;
1599 if (IMMED_TOKEN ())
1600 {
1601 if (PREV_TOKEN_TYPE == CPP_COLON
1602 && CPP_OPTION (pfile, cplusplus))
1603 BACKUP_TOKEN (CPP_SCOPE);
1604 /* Digraph: "<:" is a '[' */
1605 else if (PREV_TOKEN_TYPE == CPP_LESS)
1606 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1607 /* Digraph: "%:" is a '#' */
1608 else if (PREV_TOKEN_TYPE == CPP_MOD)
1609 {
1610 (--cur_token)->flags |= DIGRAPH;
1611 goto make_hash;
1612 }
1613 }
1614 cur_token++;
1615 break;
f8f769ea 1616
041c3194
ZW
1617 case '&':
1618 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1619 REVISE_TOKEN (CPP_AND_AND);
1620 else
1621 PUSH_TOKEN (CPP_AND);
f8f769ea 1622 break;
45b966db 1623
041c3194
ZW
1624 make_or:
1625 case '|':
1626 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1627 REVISE_TOKEN (CPP_OR_OR);
1628 else
1629 PUSH_TOKEN (CPP_OR);
1630 break;
45b966db 1631
041c3194
ZW
1632 case '+':
1633 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1634 REVISE_TOKEN (CPP_PLUS_PLUS);
1635 else
1636 PUSH_TOKEN (CPP_PLUS);
1637 break;
45b966db 1638
041c3194
ZW
1639 case '=':
1640 /* This relies on equidistance of "?=" and "?" tokens. */
1641 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1642 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1643 else
1644 PUSH_TOKEN (CPP_EQ);
1645 break;
45b966db 1646
041c3194
ZW
1647 case '>':
1648 cur_token->type = CPP_GREATER;
1649 if (IMMED_TOKEN ())
1650 {
1651 if (PREV_TOKEN_TYPE == CPP_GREATER)
1652 BACKUP_TOKEN (CPP_RSHIFT);
1653 else if (PREV_TOKEN_TYPE == CPP_MINUS)
1654 BACKUP_TOKEN (CPP_DEREF);
1655 /* Digraph: ":>" is a ']' */
1656 else if (PREV_TOKEN_TYPE == CPP_COLON)
1657 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1658 /* Digraph: "%>" is a '}' */
1659 else if (PREV_TOKEN_TYPE == CPP_MOD)
1660 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1661 }
1662 cur_token++;
1663 break;
1664
1665 case '<':
1666 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1667 {
1668 REVISE_TOKEN (CPP_LSHIFT);
1669 break;
1670 }
1671 /* Is this the beginning of a header name? */
1672 if (IS_DIRECTIVE () && (list->directive->flags & INCL))
1673 {
1674 c = '>'; /* Terminator. */
1675 cur_token->type = CPP_HEADER_NAME;
1676 goto do_parse_string;
1677 }
1678 PUSH_TOKEN (CPP_LESS);
1679 break;
45b966db 1680
041c3194
ZW
1681 case '%':
1682 /* Digraph: "<%" is a '{' */
1683 cur_token->type = CPP_MOD;
1684 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1685 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1686 cur_token++;
1687 break;
04e3ec78 1688
041c3194
ZW
1689 case '?':
1690 if (cur + 1 < buffer->rlimit && *cur == '?'
1691 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1692 {
1693 /* Handle trigraph. */
1694 cur++;
1695 switch (*cur++)
1696 {
1697 case '(': goto make_open_square;
1698 case ')': goto make_close_square;
1699 case '<': goto make_open_brace;
1700 case '>': goto make_close_brace;
1701 case '=': goto make_hash;
1702 case '!': goto make_or;
1703 case '-': goto make_complement;
1704 case '/': goto make_backslash;
1705 case '\'': goto make_xor;
1706 }
1707 }
1708 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1709 {
1710 /* GNU C++ defines <? and >? operators. */
1711 if (PREV_TOKEN_TYPE == CPP_LESS)
1712 {
1713 REVISE_TOKEN (CPP_MIN);
1714 break;
1715 }
1716 else if (PREV_TOKEN_TYPE == CPP_GREATER)
1717 {
1718 REVISE_TOKEN (CPP_MAX);
1719 break;
1720 }
1721 }
1722 PUSH_TOKEN (CPP_QUERY);
1723 break;
45b966db 1724
041c3194
ZW
1725 case '.':
1726 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1727 && IMMED_TOKEN ()
1728 && !(cur_token[-1].flags & PREV_WHITE))
1729 {
1730 cur_token -= 2;
1731 PUSH_TOKEN (CPP_ELLIPSIS);
1732 }
1733 else
1734 PUSH_TOKEN (CPP_DOT);
1735 break;
c5a04734 1736
041c3194
ZW
1737 make_complement:
1738 case '~': PUSH_TOKEN (CPP_COMPL); break;
1739 make_xor:
1740 case '^': PUSH_TOKEN (CPP_XOR); break;
1741 make_open_brace:
1742 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1743 make_close_brace:
1744 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1745 make_open_square:
1746 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1747 make_close_square:
1748 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1749 make_backslash:
1750 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1751 case '!': PUSH_TOKEN (CPP_NOT); break;
1752 case ',': PUSH_TOKEN (CPP_COMMA); break;
1753 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1754 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1755 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
6d2c2047 1756
041c3194
ZW
1757 case '$':
1758 if (CPP_OPTION (pfile, dollars_in_ident))
1759 goto letter;
1760 /* Fall through */
1761 other:
1762 default:
1763 cur_token->val.aux = c;
1764 PUSH_TOKEN (CPP_OTHER);
1765 break;
1766 }
1767 }
6d2c2047 1768
041c3194
ZW
1769 /* Run out of token space? */
1770 if (cur_token == token_limit)
1771 {
1772 list->tokens_used = cur_token - list->tokens;
1773 _cpp_expand_token_space (list, 256);
1774 goto expanded;
1775 }
6d2c2047 1776
041c3194
ZW
1777 cur_token->flags = flags;
1778 if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1779 {
1780 if (cur > buffer->buf && !IS_NEWLINE (cur[-1]))
1781 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1782 CPP_BUF_COLUMN (buffer, cur),
1783 "no newline at end of file");
1784 cur_token++->type = CPP_EOF;
1785 }
6d2c2047 1786
041c3194
ZW
1787 out:
1788 /* All tokens are allocated, so the memory location is fixed. */
1789 first = &list->tokens[first_token];
1790
1791 /* Don't complain about the null directive, nor directives in
1792 assembly source: we don't know where the comments are, and # may
1793 introduce assembler pseudo-ops. Don't complain about invalid
1794 directives in skipped conditional groups (6.10 p4). */
1795 if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1796 && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1797 {
1798 if (first[1].type == CPP_NAME)
1799 cpp_error (pfile, "invalid preprocessing directive #%.*s",
1800 (int) first[1].val.name.len, first[1].val.name.text);
1801 else
1802 cpp_error (pfile, "invalid preprocessing directive");
1803 }
1804
1805 /* Put EOF at end of directives. This covers "directives do not
1806 extend beyond the end of the line (description 6.10 part 2)". */
1807 if (IS_DIRECTIVE () || !pfile->done_initializing)
1808 {
1809 pfile->first_directive_token = first;
1810 cur_token++->type = CPP_EOF;
1811 }
1812
1813 if (first_token == 0 || IS_DIRECTIVE ())
1814 /* Set beginning of line flag. */
1815 first->flags |= BOL;
6d2c2047 1816 else
041c3194
ZW
1817 /* 6.10.3.10: Within the sequence of preprocessing tokens making
1818 up the invocation of a function-like macro, new line is
1819 considered a normal white-space character. */
1820 first->flags |= PREV_WHITE;
1821
1822 buffer->cur = cur;
1823 list->tokens_used = cur_token - list->tokens;
1824 pfile->in_lex_line = 0;
6d2c2047
ZW
1825}
1826
041c3194
ZW
1827/* Write the spelling of a token TOKEN, with any appropriate
1828 whitespace before it, to the token_buffer. PREV is the previous
1829 token, which is used to determine if we need to shove in an extra
1830 space in order to avoid accidental token paste. */
1831static void
1832output_token (pfile, token, prev)
1833 cpp_reader *pfile;
1834 const cpp_token *token, *prev;
1835{
1836 int dummy;
c5a04734 1837
041c3194
ZW
1838 if (token->col && (token->flags & BOL))
1839 {
1840 /* Supply enough whitespace to put this token in its original
1841 column. Don't bother trying to reconstruct tabs; we can't
1842 get it right in general, and nothing ought to care. (Yes,
1843 some things do care; the fault lies with them.) */
1844 unsigned char *buffer;
1845 unsigned int spaces = token->col - 1;
1846
1847 CPP_RESERVE (pfile, token->col);
1848 buffer = pfile->limit;
1849
1850 while (spaces--)
1851 *buffer++ = ' ';
1852 pfile->limit = buffer;
1853 }
1854 else if (token->flags & PREV_WHITE)
1855 CPP_PUTC (pfile, ' ');
1856 /* Check for and prevent accidental token pasting, in ANSI mode. */
d6d5f795 1857
041c3194
ZW
1858 else if (!CPP_TRADITIONAL (pfile) && prev)
1859 {
1860 if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1861 CPP_PUTC (pfile, ' ');
1862 /* can_paste catches most of the accidental paste cases, but not all.
1863 Consider a + ++b - if there is not a space between the + and ++, it
1864 will be misparsed as a++ + b. */
1865 else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1866 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1867 CPP_PUTC (pfile, ' ');
1868 }
d6d5f795 1869
041c3194
ZW
1870 CPP_RESERVE (pfile, TOKEN_LEN (token));
1871 pfile->limit = spell_token (pfile, token, pfile->limit);
1872}
d6d5f795 1873
041c3194
ZW
1874/* Write the spelling of a token TOKEN to BUFFER. The buffer must
1875 already contain the enough space to hold the token's spelling. If
1876 WHITESPACE is true, and the token was preceded by whitespace,
1877 output a single space before the token proper. Returns a pointer
1878 to the character after the last character written. */
d6d5f795 1879
041c3194
ZW
1880static unsigned char *
1881spell_token (pfile, token, buffer)
1882 cpp_reader *pfile; /* Would be nice to be rid of this... */
1883 const cpp_token *token;
1884 unsigned char *buffer;
1885{
1886 switch (token_spellings[token->type].type)
1887 {
1888 case SPELL_OPERATOR:
1889 {
1890 const unsigned char *spelling;
1891 unsigned char c;
d6d5f795 1892
041c3194
ZW
1893 if (token->flags & DIGRAPH)
1894 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1895 else
1896 spelling = token_spellings[token->type].spelling;
1897
1898 while ((c = *spelling++) != '\0')
1899 *buffer++ = c;
1900 }
1901 break;
d6d5f795 1902
041c3194
ZW
1903 case SPELL_IDENT:
1904 memcpy (buffer, token->val.name.text, token->val.name.len);
1905 buffer += token->val.name.len;
1906 break;
d6d5f795 1907
041c3194
ZW
1908 case SPELL_STRING:
1909 {
1910 unsigned char c;
d6d5f795 1911
041c3194
ZW
1912 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1913 *buffer++ = 'L';
1914 c = '\'';
1915 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1916 c = '"';
1917 *buffer++ = c;
1918 memcpy (buffer, token->val.name.text, token->val.name.len);
1919 buffer += token->val.name.len;
1920 *buffer++ = c;
1921 }
1922 break;
d6d5f795 1923
041c3194
ZW
1924 case SPELL_CHAR:
1925 *buffer++ = token->val.aux;
1926 break;
d6d5f795 1927
041c3194
ZW
1928 case SPELL_NONE:
1929 cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
1930 break;
1931 }
d6d5f795 1932
041c3194
ZW
1933 return buffer;
1934}
d6d5f795 1935
041c3194 1936/* Macro expansion algorithm. TODO. */
d6d5f795 1937
7de9cc38
KG
1938static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
1939static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
c5a04734 1940
041c3194
ZW
1941#define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
1942#define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
1943
1944/* Flags for cpp_context. */
1945#define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
1946#define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
1947#define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
1948#define CONTEXT_ARG (1 << 3) /* If an argument context. */
1949
1950#define ASSIGN_FLAGS_AND_POS(d, s) \
1951 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
1952 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1953 } while (0)
1954
1955/* f is flags, just consisting of PREV_WHITE | BOL. */
1956#define MODIFY_FLAGS_AND_POS(d, s, f) \
1957 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
1958 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1959 } while (0)
1960
1961typedef struct cpp_context cpp_context;
1962struct cpp_context
1963{
1964 union
1965 {
1966 const cpp_toklist *list; /* Used for macro contexts only. */
1967 const cpp_token **arg; /* Used for arg contexts only. */
1968 } u;
1969
1970 /* Pushed token to be returned by next call to cpp_get_token. */
1971 const cpp_token *pushed_token;
1972
1973 struct macro_args *args; /* 0 for arguments and object-like macros. */
1974 unsigned short posn; /* Current posn, index into u. */
1975 unsigned short count; /* No. of tokens in u. */
1976 unsigned short level;
1977 unsigned char flags;
1978};
1979
1980typedef struct macro_args macro_args;
1981struct macro_args
1982{
1983 unsigned int *ends;
1984 const cpp_token **tokens;
1985 unsigned int capacity;
1986 unsigned int used;
1987 unsigned short level;
1988};
1989
1990static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
1991static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
1992 macro_args *, unsigned int *));
1993static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
1994static void save_token PARAMS ((macro_args *, const cpp_token *));
1995static const cpp_token *push_arg_context PARAMS ((cpp_reader *,
1996 const cpp_token *));
1997static int do_pop_context PARAMS ((cpp_reader *));
1998static const cpp_token *pop_context PARAMS ((cpp_reader *));
1999static const cpp_token *push_macro_context PARAMS ((cpp_reader *,
2000 cpp_hashnode *,
2001 const cpp_token *));
2002static void free_macro_args PARAMS ((macro_args *));
2003
2004/* Free the storage allocated for macro arguments. */
2005static void
2006free_macro_args (args)
2007 macro_args *args;
c5a04734 2008{
041c3194
ZW
2009 if (args->tokens)
2010 free (args->tokens);
2011 free (args->ends);
2012 free (args);
c5a04734
ZW
2013}
2014
041c3194
ZW
2015/* Determines if a macro has been already used (and is therefore
2016 disabled). */
c5a04734 2017static int
041c3194 2018is_macro_disabled (pfile, expansion, token)
c5a04734 2019 cpp_reader *pfile;
041c3194
ZW
2020 const cpp_toklist *expansion;
2021 const cpp_token *token;
c5a04734 2022{
041c3194
ZW
2023 cpp_context *context = CURRENT_CONTEXT (pfile);
2024
2025 /* Arguments on either side of ## are inserted in place without
2026 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
2027 occurs during a later rescan pass. The effect is that we expand
2028 iff we would as part of the macro's expansion list, so we should
2029 drop to the macro's context. */
2030 if (IS_ARG_CONTEXT (context))
c5a04734 2031 {
041c3194
ZW
2032 if (token->flags & PASTED)
2033 context--;
2034 else if (!(context->flags & CONTEXT_RAW))
2035 return 1;
2036 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2037 context--;
c5a04734 2038 }
c5a04734 2039
041c3194
ZW
2040 /* Have we already used this macro? */
2041 while (context->level > 0)
c5a04734 2042 {
041c3194
ZW
2043 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2044 return 1;
2045 /* Raw argument tokens are judged based on the token list they
2046 came from. */
2047 if (context->flags & CONTEXT_RAW)
2048 context = pfile->contexts + context->level;
2049 else
2050 context--;
2051 }
c5a04734 2052
041c3194
ZW
2053 /* Function-like macros may be disabled if the '(' is not in the
2054 current context. We check this without disrupting the context
2055 stack. */
2056 if (expansion->paramc >= 0)
2057 {
2058 const cpp_token *next;
2059 unsigned int prev_nme;
c5a04734 2060
041c3194
ZW
2061 context = CURRENT_CONTEXT (pfile);
2062 /* Drop down any contexts we're at the end of: the '(' may
2063 appear in lower macro expansions, or in the rest of the file. */
2064 while (context->posn == context->count && context > pfile->contexts)
2065 {
2066 context--;
2067 /* If we matched, we are disabled, as we appear in the
2068 expansion of each macro we meet. */
2069 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2070 return 1;
2071 }
c5a04734 2072
041c3194
ZW
2073 prev_nme = pfile->no_expand_level;
2074 pfile->no_expand_level = context - pfile->contexts;
2075 next = cpp_get_token (pfile);
2076 restore_macro_expansion (pfile, prev_nme);
2077 if (next->type != CPP_OPEN_PAREN)
2078 {
2079 _cpp_push_token (pfile, next);
2080 if (CPP_OPTION (pfile, warn_traditional))
2081 cpp_warning (pfile,
2082 "function macro %.*s must be used with arguments in traditional C",
2083 (int) token->val.name.len, token->val.name.text);
2084 return 1;
2085 }
c5a04734 2086 }
c5a04734 2087
041c3194 2088 return 0;
c5a04734
ZW
2089}
2090
041c3194
ZW
2091/* Add a token to the set of tokens forming the arguments to the macro
2092 being parsed in parse_args. */
2093static void
2094save_token (args, token)
2095 macro_args *args;
2096 const cpp_token *token;
c5a04734 2097{
041c3194
ZW
2098 if (args->used == args->capacity)
2099 {
2100 args->capacity += args->capacity + 100;
2101 args->tokens = (const cpp_token **)
2102 xrealloc (args->tokens, args->capacity * sizeof (const cpp_token *));
2103 }
2104 args->tokens[args->used++] = token;
c5a04734
ZW
2105}
2106
041c3194
ZW
2107/* Take and save raw tokens until we finish one argument. Empty
2108 arguments are saved as a single CPP_PLACEMARKER token. */
2109static const cpp_token *
2110parse_arg (pfile, var_args, paren_context, args, pcount)
c5a04734 2111 cpp_reader *pfile;
041c3194
ZW
2112 int var_args;
2113 unsigned int paren_context;
2114 macro_args *args;
2115 unsigned int *pcount;
c5a04734 2116{
041c3194
ZW
2117 const cpp_token *token;
2118 unsigned int paren = 0, count = 0;
2119 int raw, was_raw = 1;
c5a04734 2120
041c3194 2121 for (count = 0;; count++)
c5a04734 2122 {
041c3194 2123 token = cpp_get_token (pfile);
c5a04734 2124
041c3194 2125 switch (token->type)
c5a04734 2126 {
041c3194
ZW
2127 default:
2128 break;
c5a04734 2129
041c3194
ZW
2130 case CPP_OPEN_PAREN:
2131 paren++;
2132 break;
2133
2134 case CPP_CLOSE_PAREN:
2135 if (paren-- != 0)
2136 break;
2137 goto out;
2138
2139 case CPP_COMMA:
2140 /* Commas are not terminators within parantheses or var_args. */
2141 if (paren || var_args)
2142 break;
2143 goto out;
2144
2145 case CPP_EOF: /* Error reported by caller. */
2146 goto out;
c5a04734 2147 }
c5a04734 2148
041c3194
ZW
2149 raw = pfile->cur_context <= paren_context;
2150 if (raw != was_raw)
2151 {
2152 was_raw = raw;
2153 save_token (args, 0);
2154 count++;
c5a04734 2155 }
041c3194 2156 save_token (args, token);
c5a04734 2157 }
c5a04734
ZW
2158
2159 out:
041c3194
ZW
2160 if (count == 0)
2161 {
2162 /* Duplicate the placemarker. Then we can set its flags and
2163 position and safely be using more than one. */
2164 save_token (args, duplicate_token (pfile, &placemarker_token));
2165 count++;
2166 }
2167
2168 *pcount = count;
2169 return token;
c5a04734
ZW
2170}
2171
041c3194
ZW
2172/* This macro returns true if the argument starting at offset O of arglist
2173 A is empty - that is, it's either a single PLACEMARKER token, or a null
2174 pointer followed by a PLACEMARKER. */
2175
2176#define empty_argument(A, O) \
2177 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2178 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2179
2180/* Parse the arguments making up a macro invocation. Nested arguments
2181 are automatically macro expanded, but immediate macros are not
2182 expanded; this enables e.g. operator # to work correctly. Returns
2183 non-zero on error. */
c5a04734 2184static int
041c3194 2185parse_args (pfile, hp, args)
c5a04734 2186 cpp_reader *pfile;
041c3194
ZW
2187 cpp_hashnode *hp;
2188 macro_args *args;
c5a04734 2189{
041c3194
ZW
2190 const cpp_token *token;
2191 const cpp_toklist *macro;
2192 unsigned int total = 0;
2193 unsigned int paren_context = pfile->cur_context;
2194 int argc = 0;
2195
2196 macro = hp->value.expansion;
2197 do
c5a04734 2198 {
041c3194 2199 unsigned int count;
c5a04734 2200
041c3194
ZW
2201 token = parse_arg (pfile, (argc + 1 == macro->paramc
2202 && (macro->flags & VAR_ARGS)),
2203 paren_context, args, &count);
2204 if (argc < macro->paramc)
c5a04734 2205 {
041c3194
ZW
2206 total += count;
2207 args->ends[argc] = total;
c5a04734 2208 }
041c3194 2209 argc++;
c5a04734 2210 }
041c3194 2211 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
c5a04734 2212
041c3194
ZW
2213 if (token->type == CPP_EOF)
2214 {
2215 cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2216 hp->length, hp->name);
2217 return 1;
2218 }
2219 else if (argc < macro->paramc)
2220 {
2221 /* A rest argument is allowed to not appear in the invocation at all.
2222 e.g. #define debug(format, args...) ...
2223 debug("string");
2224 This is exactly the same as if the rest argument had received no
2225 tokens - debug("string",); */
2226
2227 if (argc + 1 == macro->paramc && (macro->flags & VAR_ARGS))
2228 {
2229 /* Duplicate the placemarker. Then we can set its flags and
2230 position and safely be using more than one. */
2231 save_token (args, duplicate_token (pfile, &placemarker_token));
2232 args->ends[argc] = total + 1;
2233 return 0;
2234 }
2235 else
2236 {
2237 cpp_error (pfile,
2238 "insufficient arguments in invocation of macro \"%.*s\"",
2239 hp->length, hp->name);
2240 return 1;
2241 }
2242 }
2243 /* An empty argument to an empty function-like macro is fine. */
2244 else if (argc > macro->paramc
2245 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2246 {
2247 cpp_error (pfile,
2248 "too many arguments in invocation of macro \"%.*s\"",
2249 hp->length, hp->name);
2250 return 1;
2251 }
c5a04734 2252
041c3194
ZW
2253 return 0;
2254}
2255
2256/* Adds backslashes before all backslashes and double quotes appearing
2257 in strings. Non-printable characters are converted to octal. */
2258static U_CHAR *
2259quote_string (dest, src, len)
2260 U_CHAR *dest;
2261 const U_CHAR *src;
2262 unsigned int len;
2263{
2264 while (len--)
c5a04734 2265 {
041c3194 2266 U_CHAR c = *src++;
c5a04734 2267
041c3194 2268 if (c == '\\' || c == '"')
6ab3e7dd 2269 {
041c3194
ZW
2270 *dest++ = '\\';
2271 *dest++ = c;
2272 }
2273 else
2274 {
2275 if (ISPRINT (c))
2276 *dest++ = c;
2277 else
2278 {
2279 sprintf ((char *) dest, "\\%03o", c);
2280 dest += 4;
2281 }
6ab3e7dd 2282 }
c5a04734 2283 }
c5a04734 2284
041c3194 2285 return dest;
c5a04734
ZW
2286}
2287
041c3194
ZW
2288/* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2289 CPP_STRING token containing TEXT in quoted form. */
2290static cpp_token *
2291make_string_token (token, text, len)
2292 cpp_token *token;
2293 const U_CHAR *text;
2294 unsigned int len;
2295{
2296 U_CHAR *buf;
2297
2298 buf = (U_CHAR *) xmalloc (len * 4);
2299 token->type = CPP_STRING;
2300 token->flags = 0;
2301 token->val.name.text = buf;
2302 token->val.name.len = quote_string (buf, text, len) - buf;
2303 return token;
2304}
2305
2306/* Allocates and converts a temporary token to a CPP_NUMBER token,
2307 evaluating to NUMBER. */
2308static cpp_token *
2309alloc_number_token (pfile, number)
c5a04734 2310 cpp_reader *pfile;
041c3194 2311 int number;
c5a04734 2312{
041c3194
ZW
2313 cpp_token *result;
2314 char *buf;
2315
2316 result = get_temp_token (pfile);
2317 buf = xmalloc (20);
2318 sprintf (buf, "%d", number);
2319
2320 result->type = CPP_NUMBER;
2321 result->flags = 0;
2322 result->val.name.text = (U_CHAR *) buf;
2323 result->val.name.len = strlen (buf);
2324 return result;
2325}
c5a04734 2326
041c3194
ZW
2327/* Returns a temporary token from the temporary token store of PFILE. */
2328static cpp_token *
2329get_temp_token (pfile)
2330 cpp_reader *pfile;
2331{
2332 if (pfile->temp_used == pfile->temp_alloced)
2333 {
2334 if (pfile->temp_used == pfile->temp_cap)
2335 {
2336 pfile->temp_cap += pfile->temp_cap + 20;
2337 pfile->temp_tokens = (cpp_token **) xrealloc
2338 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2339 }
2340 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2341 (sizeof (cpp_token));
2342 }
c5a04734 2343
041c3194
ZW
2344 return pfile->temp_tokens[pfile->temp_used++];
2345}
2346
2347/* Release (not free) for re-use the temporary tokens of PFILE. */
2348static void
2349release_temp_tokens (pfile)
2350 cpp_reader *pfile;
2351{
2352 while (pfile->temp_used)
c5a04734 2353 {
041c3194 2354 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
c5a04734 2355
041c3194 2356 if (token_spellings[token->type].type > SPELL_NONE)
c5a04734 2357 {
041c3194
ZW
2358 free ((char *) token->val.name.text);
2359 token->val.name.text = 0;
c5a04734
ZW
2360 }
2361 }
041c3194 2362}
c5a04734 2363
041c3194
ZW
2364/* Free all of PFILE's dynamically-allocated temporary tokens. */
2365void
2366_cpp_free_temp_tokens (pfile)
2367 cpp_reader *pfile;
2368{
2369 if (pfile->temp_tokens)
c5a04734 2370 {
041c3194
ZW
2371 /* It is possible, though unlikely (looking for '(' of a funlike
2372 macro into EOF), that we haven't released the tokens yet. */
2373 release_temp_tokens (pfile);
2374 while (pfile->temp_alloced)
2375 free (pfile->temp_tokens[--pfile->temp_alloced]);
2376 free (pfile->temp_tokens);
c5a04734
ZW
2377 }
2378
041c3194
ZW
2379 if (pfile->date)
2380 {
2381 free ((char *) pfile->date->val.name.text);
2382 free (pfile->date);
2383 free ((char *) pfile->time->val.name.text);
2384 free (pfile->time);
2385 }
c5a04734
ZW
2386}
2387
041c3194
ZW
2388/* Copy TOKEN into a temporary token from PFILE's store. */
2389static cpp_token *
2390duplicate_token (pfile, token)
2391 cpp_reader *pfile;
2392 const cpp_token *token;
2393{
2394 cpp_token *result = get_temp_token (pfile);
c5a04734 2395
041c3194
ZW
2396 *result = *token;
2397 if (token_spellings[token->type].type > SPELL_NONE)
2398 {
2399 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.name.len);
2400 memcpy (buff, token->val.name.text, token->val.name.len);
2401 result->val.name.text = buff;
2402 }
2403 return result;
2404}
c5a04734 2405
041c3194
ZW
2406/* Determine whether two tokens can be pasted together, and if so,
2407 what the resulting token is. Returns CPP_EOF if the tokens cannot
2408 be pasted, or the appropriate type for the merged token if they
2409 can. */
2410static enum cpp_ttype
2411can_paste (pfile, token1, token2, digraph)
2412 cpp_reader * pfile;
2413 const cpp_token *token1, *token2;
2414 int* digraph;
c5a04734 2415{
041c3194
ZW
2416 enum cpp_ttype a = token1->type, b = token2->type;
2417 int cxx = CPP_OPTION (pfile, cplusplus);
c5a04734 2418
041c3194
ZW
2419 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2420 return a + (CPP_EQ_EQ - CPP_EQ);
c5a04734 2421
041c3194 2422 switch (a)
c5a04734 2423 {
041c3194
ZW
2424 case CPP_GREATER:
2425 if (b == a) return CPP_RSHIFT;
2426 if (b == CPP_QUERY && cxx) return CPP_MAX;
2427 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2428 break;
2429 case CPP_LESS:
2430 if (b == a) return CPP_LSHIFT;
2431 if (b == CPP_QUERY && cxx) return CPP_MIN;
2432 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
2433 if (b == CPP_COLON)
2434 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2435 if (b == CPP_MOD)
2436 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2437 break;
c5a04734 2438
041c3194
ZW
2439 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2440 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2441 case CPP_OR: if (b == a) return CPP_OR_OR; break;
c5a04734 2442
041c3194
ZW
2443 case CPP_MINUS:
2444 if (b == a) return CPP_MINUS_MINUS;
2445 if (b == CPP_GREATER) return CPP_DEREF;
2446 break;
2447 case CPP_COLON:
2448 if (b == a && cxx) return CPP_SCOPE;
2449 if (b == CPP_GREATER)
2450 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2451 break;
2452
2453 case CPP_MOD:
2454 if (b == CPP_GREATER)
2455 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2456 if (b == CPP_COLON)
2457 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2458 break;
2459 case CPP_DEREF:
2460 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2461 break;
2462 case CPP_DOT:
2463 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2464 if (b == CPP_NUMBER) return CPP_NUMBER;
2465 break;
2466
2467 case CPP_HASH:
2468 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2469 /* %:%: digraph */
2470 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2471 break;
2472
2473 case CPP_NAME:
2474 if (b == CPP_NAME) return CPP_NAME;
2475 if (b == CPP_NUMBER
2476 && is_numstart(token2->val.name.text[0])) return CPP_NAME;
2477 if (b == CPP_CHAR
2478 && token1->val.name.len == 1
2479 && token1->val.name.text[0] == 'L') return CPP_WCHAR;
2480 if (b == CPP_STRING
2481 && token1->val.name.len == 1
2482 && token1->val.name.text[0] == 'L') return CPP_WSTRING;
2483 break;
2484
2485 case CPP_NUMBER:
2486 if (b == CPP_NUMBER) return CPP_NUMBER;
2487 if (b == CPP_NAME) return CPP_NUMBER;
2488 if (b == CPP_DOT) return CPP_NUMBER;
2489 /* Numbers cannot have length zero, so this is safe. */
2490 if ((b == CPP_PLUS || b == CPP_MINUS)
2491 && VALID_SIGN ('+', token1->val.name.text[token1->val.name.len - 1]))
2492 return CPP_NUMBER;
2493 break;
2494
2495 default:
2496 break;
c5a04734
ZW
2497 }
2498
041c3194
ZW
2499 return CPP_EOF;
2500}
2501
2502/* Check if TOKEN is to be ##-pasted with the token after it. */
2503static const cpp_token *
2504maybe_paste_with_next (pfile, token)
2505 cpp_reader *pfile;
2506 const cpp_token *token;
2507{
2508 cpp_token *pasted;
2509 const cpp_token *second;
2510 cpp_context *context = CURRENT_CONTEXT (pfile);
2511
2512 /* Is this token on the LHS of ## ? */
2513 if (!((context->flags & CONTEXT_PASTEL) && context->posn == context->count)
2514 && !(token->flags & PASTE_LEFT))
2515 return token;
2516
2517 /* Prevent recursion, and possibly pushing back more than one token. */
2518 if (pfile->paste_level)
2519 return token;
2520
2521 /* Suppress macro expansion for next token, but don't conflict with
2522 the other method of suppression. If it is an argument, macro
2523 expansion within the argument will still occur. */
2524 pfile->paste_level = pfile->cur_context;
2525 second = cpp_get_token (pfile);
2526 pfile->paste_level = 0;
2527
2528 /* Ignore placemarker argument tokens. */
2529 if (token->type == CPP_PLACEMARKER)
2530 pasted = duplicate_token (pfile, second);
2531 else if (second->type == CPP_PLACEMARKER)
c5a04734 2532 {
041c3194
ZW
2533 /* GCC has special extended semantics for a ## b where b is a
2534 varargs parameter: a disappears if b consists of no tokens.
2535 This extension is deprecated. */
2536 if (token->flags & GNU_VARARGS)
2537 {
2538 cpp_warning (pfile, "deprecated GNU ## extension used");
2539 pasted = duplicate_token (pfile, second);
2540 }
2541 else
2542 pasted = duplicate_token (pfile, token);
c5a04734 2543 }
041c3194
ZW
2544 else
2545 {
2546 int digraph = 0;
2547 enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
c5a04734 2548
041c3194
ZW
2549 if (type == CPP_EOF)
2550 {
2551 if (CPP_OPTION (pfile, warn_paste))
2552 cpp_warning (pfile,
2553 "pasting would not give a valid preprocessing token");
2554 _cpp_push_token (pfile, second);
2555 return token;
2556 }
c5a04734 2557
041c3194
ZW
2558 if (type == CPP_NAME || type == CPP_NUMBER)
2559 {
2560 /* Join spellings. */
2561 U_CHAR *buff, *buff2;
c5a04734 2562
041c3194
ZW
2563 pasted = get_temp_token (pfile);
2564 buff = (U_CHAR *) xmalloc (TOKEN_LEN (token) + TOKEN_LEN (second));
2565 buff2 = spell_token (pfile, token, buff);
2566 buff2 = spell_token (pfile, second, buff2);
2567
2568 pasted->val.name.text = buff;
2569 pasted->val.name.len = buff2 - buff;
2570 }
2571 else if (type == CPP_WCHAR || type == CPP_WSTRING)
2572 pasted = duplicate_token (pfile, second);
2573 else
2574 {
2575 pasted = get_temp_token (pfile);
2576 pasted->val.integer = 0;
2577 }
2578
2579 pasted->type = type;
2580 pasted->flags = digraph ? DIGRAPH: 0;
2581 }
2582
2583 /* The pasted token gets the whitespace flags and position of the
2584 first token, the PASTE_LEFT flag of the second token, plus the
2585 PASTED flag to indicate it is the result of a paste. However, we
2586 want to preserve the DIGRAPH flag. */
2587 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2588 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2589 | (second->flags & PASTE_LEFT) | PASTED);
2590 pasted->col = token->col;
2591 pasted->line = token->line;
2592
2593 return maybe_paste_with_next (pfile, pasted);
2594}
2595
2596/* Convert a token sequence to a single string token according to the
2597 rules of the ISO C #-operator. */
2598#define INIT_SIZE 200
2599static cpp_token *
2600stringify_arg (pfile, token)
c5a04734 2601 cpp_reader *pfile;
041c3194 2602 const cpp_token *token;
c5a04734 2603{
041c3194
ZW
2604 cpp_token *result;
2605 unsigned char *main_buf;
2606 unsigned int prev_value, backslash_count = 0;
2607 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
c5a04734 2608
041c3194
ZW
2609 prev_value = prevent_macro_expansion (pfile);
2610 main_buf = (unsigned char *) xmalloc (buf_cap);
c5a04734 2611
041c3194
ZW
2612 result = get_temp_token (pfile);
2613 ASSIGN_FLAGS_AND_POS (result, token);
2614
2615 for (; (token = cpp_get_token (pfile))->type != CPP_EOF; )
c5a04734 2616 {
041c3194
ZW
2617 int escape;
2618 unsigned char *buf;
2619 unsigned int len = TOKEN_LEN (token);
c5a04734 2620
041c3194
ZW
2621 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2622 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2623 if (escape)
2624 len *= 4 + 1;
2625
2626 if (buf_used + len > buf_cap)
c5a04734 2627 {
041c3194
ZW
2628 buf_cap = buf_used + len + INIT_SIZE;
2629 main_buf = xrealloc (main_buf, buf_cap);
2630 }
c5a04734 2631
041c3194
ZW
2632 if (whitespace && (token->flags & PREV_WHITE))
2633 main_buf[buf_used++] = ' ';
c5a04734 2634
041c3194
ZW
2635 if (escape)
2636 buf = (unsigned char *) xmalloc (len);
2637 else
2638 buf = main_buf + buf_used;
2639
2640 len = spell_token (pfile, token, buf) - buf;
2641 if (escape)
2642 {
2643 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2644 free (buf);
2645 }
2646 else
2647 buf_used += len;
c5a04734 2648
041c3194
ZW
2649 whitespace = 1;
2650 if (token->type == CPP_BACKSLASH)
2651 backslash_count++;
2652 else
2653 backslash_count = 0;
2654 }
c5a04734 2655
041c3194
ZW
2656 /* Ignore the final \ of invalid string literals. */
2657 if (backslash_count & 1)
2658 {
2659 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2660 buf_used--;
2661 }
c5a04734 2662
041c3194
ZW
2663 result->type = CPP_STRING;
2664 result->val.name.text = main_buf;
2665 result->val.name.len = buf_used;
2666 restore_macro_expansion (pfile, prev_value);
2667 return result;
2668}
c5a04734 2669
041c3194
ZW
2670/* Allocate more room on the context stack of PFILE. */
2671static void
2672expand_context_stack (pfile)
2673 cpp_reader *pfile;
2674{
2675 pfile->context_cap += pfile->context_cap + 20;
2676 pfile->contexts = (cpp_context *)
2677 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2678}
c5a04734 2679
041c3194
ZW
2680/* Push the context of macro NODE onto the context stack. TOKEN is
2681 the CPP_NAME token invoking the macro. */
2682static const cpp_token *
2683push_macro_context (pfile, node, token)
2684 cpp_reader *pfile;
2685 cpp_hashnode *node;
2686 const cpp_token *token;
2687{
2688 unsigned char orig_flags;
2689 macro_args *args;
2690 cpp_context *context;
c5a04734 2691
041c3194 2692 if (pfile->cur_context > CPP_STACK_MAX)
c5a04734 2693 {
041c3194
ZW
2694 cpp_error (pfile, "infinite macro recursion invoking '%s'", node->name);
2695 return token;
c5a04734
ZW
2696 }
2697
041c3194
ZW
2698 /* Token's flags may change when parsing args containing a nested
2699 invocation of this macro. */
2700 orig_flags = token->flags & (PREV_WHITE | BOL);
2701 args = 0;
2702 if (node->value.expansion->paramc >= 0)
c5a04734 2703 {
041c3194
ZW
2704 unsigned int error, prev_nme;
2705
2706 /* Allocate room for the argument contexts, and parse them. */
2707 args = (macro_args *) xmalloc (sizeof (macro_args));
2708 args->ends = (unsigned int *)
2709 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2710 args->tokens = 0;
2711 args->capacity = 0;
2712 args->used = 0;
2713 args->level = pfile->cur_context;
2714
2715 prev_nme = prevent_macro_expansion (pfile);
2716 pfile->args = args;
2717 error = parse_args (pfile, node, args);
2718 pfile->args = 0;
2719 restore_macro_expansion (pfile, prev_nme);
2720 if (error)
2721 {
2722 free_macro_args (args);
2723 return token;
2724 }
c5a04734 2725 }
c5a04734 2726
041c3194
ZW
2727 /* Now push its context. */
2728 pfile->cur_context++;
2729 if (pfile->cur_context == pfile->context_cap)
2730 expand_context_stack (pfile);
2731
2732 context = CURRENT_CONTEXT (pfile);
2733 context->u.list = node->value.expansion;
2734 context->args = args;
2735 context->posn = 0;
2736 context->count = context->u.list->tokens_used;
2737 context->level = pfile->cur_context;
2738 context->flags = 0;
2739 context->pushed_token = 0;
2740
2741 /* Set the flags of the first token. We know there must
2742 be one, empty macros are a single placemarker token. */
2743 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2744
2745 return cpp_get_token (pfile);
c5a04734
ZW
2746}
2747
041c3194
ZW
2748/* Push an argument to the current macro onto the context stack.
2749 TOKEN is the MACRO_ARG token representing the argument expansion. */
2750static const cpp_token *
2751push_arg_context (pfile, token)
2752 cpp_reader *pfile;
2753 const cpp_token *token;
c5a04734 2754{
041c3194
ZW
2755 cpp_context *context;
2756 macro_args *args;
2757
2758 pfile->cur_context++;
2759 if (pfile->cur_context == pfile->context_cap)
2760 expand_context_stack (pfile);
2761
2762 context = CURRENT_CONTEXT (pfile);
2763 args = context[-1].args;
2764
2765 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2766 context->u.arg = args->tokens + context->count;
2767 context->count = args->ends[token->val.aux] - context->count;
2768 context->args = 0;
2769 context->posn = 0;
2770 context->level = args->level;
2771 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2772 context->pushed_token = 0;
2773
2774 /* Set the flags of the first token. There is one. */
2775 {
2776 const cpp_token *first = context->u.arg[0];
2777 if (!first)
2778 first = context->u.arg[1];
c5a04734 2779
041c3194
ZW
2780 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2781 token->flags & (PREV_WHITE | BOL));
2782 }
c5a04734 2783
041c3194
ZW
2784 if (token->flags & STRINGIFY_ARG)
2785 return stringify_arg (pfile, token);
c5a04734 2786
041c3194
ZW
2787 if (token->flags & PASTE_LEFT)
2788 context->flags |= CONTEXT_PASTEL;
2789 if (pfile->paste_level)
2790 context->flags |= CONTEXT_PASTER;
d1d9a6bd 2791
041c3194 2792 return get_raw_token (pfile);
c5a04734
ZW
2793}
2794
041c3194
ZW
2795/* "Unget" a token. It is effectively inserted in the token queue and
2796 will be returned by the next call to get_raw_token. */
c5a04734 2797void
041c3194 2798_cpp_push_token (pfile, token)
c5a04734 2799 cpp_reader *pfile;
041c3194 2800 const cpp_token *token;
c5a04734 2801{
041c3194
ZW
2802 cpp_context *context = CURRENT_CONTEXT (pfile);
2803 if (context->pushed_token)
2804 cpp_ice (pfile, "two tokens pushed in a row");
2805 if (token->type != CPP_EOF)
2806 context->pushed_token = token;
2807 /* Don't push back a directive's CPP_EOF, step back instead. */
2808 else if (pfile->cur_context == 0)
2809 pfile->contexts[0].posn--;
2810}
c5a04734 2811
041c3194
ZW
2812/* Handle a preprocessing directive. TOKEN is the CPP_HASH token
2813 introducing the directive. */
2814static void
2815process_directive (pfile, token)
2816 cpp_reader *pfile;
2817 const cpp_token *token;
2818{
2819 const struct directive *d = pfile->token_list.directive;
2820 int prev_nme = 0;
2821
2822 /* Skip over the directive name. */
2823 if (token[1].type == CPP_NAME)
2824 _cpp_get_raw_token (pfile);
2825 else if (token[1].type != CPP_NUMBER)
2826 cpp_ice (pfile, "directive begins with %s?!",
2827 token_names[token[1].type]);
2828
2829 /* Flush pending tokens at this point, in case the directive produces
2830 output. XXX Directive output won't be visible to a direct caller of
2831 cpp_get_token. */
2832 if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
2833 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
2834
2835 if (! (d->flags & EXPAND))
2836 prev_nme = prevent_macro_expansion (pfile);
2837 (void) (*d->handler) (pfile);
2838 if (! (d->flags & EXPAND))
2839 restore_macro_expansion (pfile, prev_nme);
2840 _cpp_skip_rest_of_line (pfile);
2841}
c5a04734 2842
041c3194
ZW
2843/* The external interface to return the next token. All macro
2844 expansion and directive processing is handled internally, the
2845 caller only ever sees the output after preprocessing. */
2846const cpp_token *
2847cpp_get_token (pfile)
2848 cpp_reader *pfile;
2849{
2850 const cpp_token *token;
2851 cpp_hashnode *node;
6ab3e7dd 2852
041c3194
ZW
2853 /* Loop till we hit a non-directive, non-skipped, non-placemarker token. */
2854 for (;;)
2855 {
2856 token = get_raw_token (pfile);
2857 if (token->flags & BOL && token->type == CPP_HASH
2858 && pfile->token_list.directive)
c5a04734 2859 {
041c3194
ZW
2860 process_directive (pfile, token);
2861 continue;
c5a04734
ZW
2862 }
2863
041c3194
ZW
2864 /* Short circuit EOF. */
2865 if (token->type == CPP_EOF)
2866 return token;
2867
2868 if (pfile->skipping && ! pfile->token_list.directive)
c5a04734 2869 {
041c3194
ZW
2870 _cpp_skip_rest_of_line (pfile);
2871 continue;
2872 }
2873 break;
2874 }
c5a04734 2875
041c3194
ZW
2876 /* If there's a potential control macro and we get here, then that
2877 #ifndef didn't cover the entire file and its argument shouldn't
2878 be taken as a control macro. */
2879 pfile->potential_control_macro = 0;
c5a04734 2880
041c3194 2881 token = maybe_paste_with_next (pfile, token);
c5a04734 2882
041c3194
ZW
2883 if (token->type != CPP_NAME)
2884 return token;
c5a04734 2885
041c3194
ZW
2886 /* Is macro expansion disabled in general? */
2887 if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
2888 return token;
2889
2890 node = cpp_lookup (pfile, token->val.name.text, token->val.name.len);
2891 if (node->type == T_VOID)
2892 return token;
c5a04734 2893
041c3194
ZW
2894 if (node->type == T_MACRO)
2895 {
2896 if (is_macro_disabled (pfile, node->value.expansion, token))
2897 return token;
c5a04734 2898
041c3194
ZW
2899 return push_macro_context (pfile, node, token);
2900 }
2901 else
2902 return special_symbol (pfile, node, token);
2903}
c5a04734 2904
041c3194
ZW
2905/* Returns the next raw token, i.e. without performing macro
2906 expansion. Argument contexts are automatically entered. */
2907static const cpp_token *
2908get_raw_token (pfile)
2909 cpp_reader *pfile;
2910{
2911 const cpp_token *result;
2912 cpp_context *context = CURRENT_CONTEXT (pfile);
c5a04734 2913
041c3194
ZW
2914 if (context->pushed_token)
2915 {
2916 result = context->pushed_token;
2917 context->pushed_token = 0;
2918 }
2919 else if (context->posn == context->count)
2920 result = pop_context (pfile);
2921 else
2922 {
2923 if (IS_ARG_CONTEXT (context))
2924 {
2925 result = context->u.arg[context->posn++];
2926 if (result == 0)
c5a04734 2927 {
041c3194
ZW
2928 context->flags ^= CONTEXT_RAW;
2929 result = context->u.arg[context->posn++];
c5a04734 2930 }
041c3194
ZW
2931 return result; /* Cannot be a CPP_MACRO_ARG */
2932 }
2933 result = &context->u.list->tokens[context->posn++];
2934 }
c5a04734 2935
041c3194
ZW
2936 if (result->type == CPP_MACRO_ARG)
2937 result = push_arg_context (pfile, result);
2938 return result;
2939}
c5a04734 2940
041c3194
ZW
2941/* Internal interface to get the token without macro expanding. */
2942const cpp_token *
2943_cpp_get_raw_token (pfile)
2944 cpp_reader *pfile;
2945{
2946 int prev_nme = prevent_macro_expansion (pfile);
2947 const cpp_token *result = cpp_get_token (pfile);
2948 restore_macro_expansion (pfile, prev_nme);
2949 return result;
2950}
c5a04734 2951
041c3194
ZW
2952/* A thin wrapper to lex_line. CLEAR is non-zero if the current token
2953 list should be overwritten, or zero if we need to append
2954 (typically, if we are within the arguments to a macro, or looking
2955 for the '(' to start a function-like macro invocation). */
2956static int
2957lex_next (pfile, clear)
2958 cpp_reader *pfile;
2959 int clear;
2960{
2961 cpp_toklist *list = &pfile->token_list;
2962 const cpp_token *old_list = list->tokens;
2963 unsigned int old_used = list->tokens_used;
c5a04734 2964
041c3194 2965 if (clear)
c5a04734 2966 {
041c3194
ZW
2967 /* Release all temporary tokens. */
2968 _cpp_clear_toklist (list);
2969 pfile->contexts[0].posn = 0;
2970 if (pfile->temp_used)
2971 release_temp_tokens (pfile);
c5a04734 2972 }
041c3194
ZW
2973 else
2974 {
2975 /* If we are currently processing a directive, do not advance.
2976 (6.10 paragraph 2: A new-line character ends the directive
2977 even if it occurs within what would otherwise be an
2978 invocation of a function-like macro.) */
2979 if (list->directive)
2980 return 1;
2981 }
2982
2983 lex_line (pfile, list);
2984 pfile->contexts[0].count = list->tokens_used;
c5a04734 2985
041c3194 2986 if (!clear && pfile->args)
c5a04734 2987 {
041c3194
ZW
2988 /* Fix up argument token pointers. */
2989 if (old_list != list->tokens)
2990 {
2991 unsigned int i;
2992
2993 for (i = 0; i < pfile->args->used; i++)
2994 {
2995 const cpp_token *token = pfile->args->tokens[i];
2996 if (token >= old_list && token < old_list + old_used)
2997 pfile->args->tokens[i] = (const cpp_token *)
2998 ((char *) token + ((char *) list->tokens - (char *) old_list));
2999 }
3000 }
3001
3002 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3003 tokens within the list of arguments that would otherwise act as
3004 preprocessing directives, the behavior is undefined.
3005
3006 This implementation will report a hard error and treat the
3007 'sequence of preprocessing tokens' as part of the macro argument,
3008 not a directive.
3009
3010 Note if pfile->args == 0, we're OK since we're only inside a
3011 macro argument after a '('. */
3012 if (list->directive)
3013 {
3014 cpp_error_with_line (pfile, list->tokens[old_used].line,
3015 list->tokens[old_used].col,
3016 "#%s may not be used inside a macro argument",
3017 list->directive->name);
3018 /* Don't treat as a directive: clear list->directive,
3019 prune the final EOF from the list. */
3020 list->directive = 0;
3021 list->tokens_used--;
3022 pfile->contexts[0].count--;
3023 }
c5a04734
ZW
3024 }
3025
041c3194 3026 return 0;
c5a04734
ZW
3027}
3028
041c3194
ZW
3029/* Pops a context of the context stack. If we're at the bottom, lexes
3030 the next logical line. Returns 1 if we're at the end of the
3031 argument list to the # operator, or if it is illegal to "overflow"
3032 into the rest of the file (e.g. 6.10.3.1.1). */
3033static int
3034do_pop_context (pfile)
3035 cpp_reader *pfile;
3036{
3037 cpp_context *context;
3fef5b2b 3038
041c3194
ZW
3039 if (pfile->cur_context == 0)
3040 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3041
3042 /* Argument contexts, when parsing args or handling # operator
3043 return CPP_EOF at the end. */
3044 context = CURRENT_CONTEXT (pfile);
3045 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3046 return 1;
3047
3048 /* Free resources when leaving macro contexts. */
3049 if (context->args)
3050 free_macro_args (context->args);
3051
3052 if (pfile->cur_context == pfile->no_expand_level)
3053 pfile->no_expand_level--;
3054 pfile->cur_context--;
3055
3056 return 0;
3057}
3058
3059/* Move down the context stack, and return the next raw token. */
3060static const cpp_token *
3061pop_context (pfile)
3062 cpp_reader *pfile;
3063{
3064 if (do_pop_context (pfile))
3065 return &eof_token;
3066 return get_raw_token (pfile);
3067}
3068
3069/* Turn off macro expansion at the current context level. */
3070static unsigned int
3071prevent_macro_expansion (pfile)
3072 cpp_reader *pfile;
3073{
3074 unsigned int prev_value = pfile->no_expand_level;
3075 pfile->no_expand_level = pfile->cur_context;
3076 return prev_value;
3077}
3078
3079/* Restore macro expansion to its previous state. */
3080static void
3081restore_macro_expansion (pfile, prev_value)
3082 cpp_reader *pfile;
3083 unsigned int prev_value;
3084{
3085 pfile->no_expand_level = prev_value;
3086}
3087
3088/* Used by cpperror.c to obtain the correct line and column to report
3089 in a diagnostic. */
3090unsigned int
3091_cpp_get_line (pfile, pcol)
3092 cpp_reader *pfile;
3093 unsigned int *pcol;
3094{
3095 unsigned int index;
3096 const cpp_token *cur_token;
3097
3098 if (pfile->in_lex_line)
3099 index = pfile->token_list.tokens_used;
3100 else
3101 index = pfile->contexts[0].posn;
3102
3103 cur_token = &pfile->token_list.tokens[index - 1];
3104 if (pcol)
3105 *pcol = cur_token->col;
3106 return cur_token->line;
3107}
3108
3109#define DSC(str) (const U_CHAR *)str, sizeof str - 1
3110static const char * const monthnames[] =
3111{
3112 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3113 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3114};
3115
3116/* Handle builtin macros like __FILE__. */
3117static const cpp_token *
3118special_symbol (pfile, node, token)
3119 cpp_reader *pfile;
3120 cpp_hashnode *node;
d1d9a6bd 3121 const cpp_token *token;
3fef5b2b 3122{
041c3194
ZW
3123 cpp_token *result;
3124 cpp_buffer *ip;
3fef5b2b 3125
041c3194 3126 switch (node->type)
3fef5b2b 3127 {
041c3194
ZW
3128 case T_FILE:
3129 case T_BASE_FILE:
3fef5b2b 3130 {
041c3194 3131 const char *file;
3fef5b2b 3132
041c3194
ZW
3133 ip = CPP_BUFFER (pfile);
3134 if (ip == 0)
3135 file = "";
3fef5b2b 3136 else
041c3194
ZW
3137 {
3138 if (node->type == T_BASE_FILE)
3139 while (CPP_PREV_BUFFER (ip) != NULL)
3140 ip = CPP_PREV_BUFFER (ip);
3141
3142 file = ip->nominal_fname;
3143 }
3144 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3145 strlen (file));
3146 }
3147 break;
3fef5b2b 3148
041c3194
ZW
3149 case T_INCLUDE_LEVEL:
3150 {
3151 int true_indepth = 0;
3152
3153 /* Do not count the primary source file in the include level. */
3154 ip = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
3155 while (ip)
3156 {
3157 true_indepth++;
3158 ip = CPP_PREV_BUFFER (ip);
3159 }
3160 result = alloc_number_token (pfile, true_indepth);
3fef5b2b
NB
3161 }
3162 break;
3163
041c3194
ZW
3164 case T_SPECLINE:
3165 /* If __LINE__ is embedded in a macro, it must expand to the
3166 line of the macro's invocation, not its definition.
3167 Otherwise things like assert() will not work properly. */
3168 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
5d7ee2fa
NB
3169 break;
3170
041c3194 3171 case T_STDC:
3fef5b2b 3172 {
041c3194 3173 int stdc = 1;
3fef5b2b 3174
041c3194
ZW
3175#ifdef STDC_0_IN_SYSTEM_HEADERS
3176 if (CPP_IN_SYSTEM_HEADER (pfile)
3177 && !cpp_defined (pfile, DSC("__STRICT_ANSI__")))
3178 stdc = 0;
3179#endif
3180 result = alloc_number_token (pfile, stdc);
3fef5b2b
NB
3181 }
3182 break;
3183
041c3194
ZW
3184 case T_DATE:
3185 case T_TIME:
3186 if (pfile->date == 0)
3187 {
3188 /* Allocate __DATE__ and __TIME__ from permanent storage,
3189 and save them in pfile so we don't have to do this again.
3190 We don't generate these strings at init time because
3191 time() and localtime() are very slow on some systems. */
3192 time_t tt = time (NULL);
3193 struct tm *tb = localtime (&tt);
3194
3195 pfile->date = make_string_token
3196 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3197 pfile->time = make_string_token
3198 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3199
3200 sprintf ((char *) pfile->date->val.name.text, "%s %2d %4d",
3201 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3202 sprintf ((char *) pfile->time->val.name.text, "%02d:%02d:%02d",
3203 tb->tm_hour, tb->tm_min, tb->tm_sec);
3204 }
3205 result = node->type == T_DATE ? pfile->date: pfile->time;
3fef5b2b
NB
3206 break;
3207
041c3194
ZW
3208 case T_POISON:
3209 cpp_error (pfile, "attempt to use poisoned \"%s\".", node->name);
3210 return token;
3211
3212 default:
3213 cpp_ice (pfile, "invalid special hash type");
3214 return token;
3fef5b2b
NB
3215 }
3216
041c3194
ZW
3217 ASSIGN_FLAGS_AND_POS (result, token);
3218 return result;
3219}
3220#undef DSC
3221
3222/* Dump the original user's spelling of argument index ARG_NO to the
3223 macro whose expansion is LIST. */
3224static void
3225dump_param_spelling (pfile, list, arg_no)
3226 cpp_reader *pfile;
3227 const cpp_toklist *list;
3228 unsigned int arg_no;
3229{
3230 const U_CHAR *param = list->namebuf;
3231
3232 while (arg_no--)
3233 param += ustrlen (param) + 1;
3234 CPP_PUTS (pfile, param, ustrlen (param));
3fef5b2b
NB
3235}
3236
041c3194 3237/* Dump a token list to the output. */
c5a04734 3238void
041c3194
ZW
3239_cpp_dump_list (pfile, list, token, flush)
3240 cpp_reader *pfile;
3241 const cpp_toklist *list;
3242 const cpp_token *token;
3243 int flush;
c5a04734 3244{
041c3194
ZW
3245 const cpp_token *limit = list->tokens + list->tokens_used;
3246 const cpp_token *prev = 0;
c5a04734 3247
041c3194
ZW
3248 /* Avoid the CPP_EOF. */
3249 if (list->directive)
3250 limit--;
c5a04734 3251
041c3194 3252 while (token < limit)
c5a04734 3253 {
041c3194
ZW
3254 if (token->type == CPP_MACRO_ARG)
3255 {
3256 if (token->flags & PREV_WHITE)
3257 CPP_PUTC (pfile, ' ');
3258 if (token->flags & STRINGIFY_ARG)
3259 CPP_PUTC (pfile, '#');
3260 dump_param_spelling (pfile, list, token->val.aux);
3261 }
c5a04734 3262 else
041c3194
ZW
3263 output_token (pfile, token, prev);
3264 if (token->flags & PASTE_LEFT)
3265 CPP_PUTS (pfile, " ##", 3);
3266 prev = token;
3267 token++;
c5a04734 3268 }
041c3194
ZW
3269
3270 if (flush && pfile->printer)
3271 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
c5a04734
ZW
3272}
3273
041c3194
ZW
3274/* Stub function during conversion, mainly for cppexp.c's benefit. */
3275enum cpp_ttype
3276_cpp_get_directive_token (pfile)
c5a04734 3277 cpp_reader *pfile;
c5a04734 3278{
041c3194 3279 const cpp_token *tok;
c5a04734 3280
041c3194
ZW
3281 if (pfile->no_macro_expand)
3282 tok = _cpp_get_raw_token (pfile);
3283 else
3284 tok = cpp_get_token (pfile);
3285
3286 if (tok->type == CPP_EOF)
3287 return CPP_VSPACE; /* backward compat; and don't try to spell EOF */
3288
3289 CPP_RESERVE (pfile, TOKEN_LEN (tok));
3290 pfile->limit = spell_token (pfile, tok, pfile->limit);
3291 return tok->type;
3292}
3293
3294/* Allocate pfile->input_buffer, and initialize trigraph_map[]
3295 if it hasn't happened already. */
3296
3297void
3298_cpp_init_input_buffer (pfile)
3299 cpp_reader *pfile;
3300{
3301 init_trigraph_map ();
3302 pfile->context_cap = 20;
3303 pfile->contexts = (cpp_context *)
3304 xmalloc (pfile->context_cap * sizeof (cpp_context));
3305 pfile->cur_context = 0;
3306 pfile->contexts[0].u.list = &pfile->token_list;
3307
3308 pfile->contexts[0].posn = 0;
3309 pfile->contexts[0].count = 0;
3310 pfile->no_expand_level = UINT_MAX;
3311
3312 _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3313}
3314
3315/* Moves to the end of the directive line, popping contexts as
3316 necessary. */
3317void
3318_cpp_skip_rest_of_line (pfile)
3319 cpp_reader *pfile;
3320{
3321 /* Get to base context. Clear parsing args and each contexts flags,
3322 since these can cause pop_context to return without popping. */
3323 pfile->no_expand_level = UINT_MAX;
3324 while (pfile->cur_context != 0)
c5a04734 3325 {
041c3194
ZW
3326 pfile->contexts[pfile->cur_context].flags = 0;
3327 do_pop_context (pfile);
c5a04734 3328 }
041c3194
ZW
3329
3330 pfile->contexts[pfile->cur_context].count = 0;
3331 pfile->contexts[pfile->cur_context].posn = 0;
3332 pfile->token_list.directive = 0;
c5a04734
ZW
3333}
3334
041c3194
ZW
3335/* Directive handler wrapper used by the command line option
3336 processor. */
3337void
3338_cpp_run_directive (pfile, dir, buf, count)
3339 cpp_reader *pfile;
3340 const struct directive *dir;
3341 const char *buf;
3342 size_t count;
3343{
3344 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3345 {
3346 unsigned int prev_lvl = 0;
3347 /* scan the line now, else prevent_macro_expansion won't work */
3348 do_pop_context (pfile);
3349 if (! (dir->flags & EXPAND))
3350 prev_lvl = prevent_macro_expansion (pfile);
3351
3352 (void) (*dir->handler) (pfile);
3353
3354 if (! (dir->flags & EXPAND))
3355 restore_macro_expansion (pfile, prev_lvl);
3356
3357 _cpp_skip_rest_of_line (pfile);
3358 cpp_pop_buffer (pfile);
3359 }
3360}
This page took 0.508554 seconds and 5 git commands to generate.