]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
cpphash.c (struct macro_info): Add new members.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
041c3194
ZW
23/*
24
25Cleanups to do:-
26
041c3194
ZW
27o -dM and with _cpp_dump_list: too many \n output.
28o Put a printer object in cpp_reader?
29o Check line numbers assigned to all errors.
30o Replace strncmp with memcmp almost everywhere.
31o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
041c3194
ZW
32o Convert do_ functions to return void. Kaveh thinks its OK; and said he'll
33 give it a run when we've got some code.
041c3194
ZW
34o Distinguish integers, floats, and 'other' pp-numbers.
35o Store ints and char constants as binary values.
36o New command-line assertion syntax.
041c3194
ZW
37o Work towards functions in cpperror.c taking a message level parameter.
38 If we do this, merge the common code of do_warning and do_error.
39o Comment all functions, and describe macro expansion algorithm.
40o Move as much out of header files as possible.
41o Remove single quote pairs `', and some '', from diagnostics.
42o Correct pastability test for CPP_NAME and CPP_NUMBER.
43
44*/
45
45b966db
ZW
46#include "config.h"
47#include "system.h"
48#include "intl.h"
49#include "cpplib.h"
50#include "cpphash.h"
041c3194 51#include "symcat.h"
45b966db 52
f9a0e96c
ZW
53static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
54static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
55
56/* Flags for cpp_context. */
57#define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
58#define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
59#define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
60#define CONTEXT_ARG (1 << 3) /* If an argument context. */
61
62typedef struct cpp_context cpp_context;
63struct cpp_context
64{
65 union
66 {
67 const cpp_toklist *list; /* Used for macro contexts only. */
68 const cpp_token **arg; /* Used for arg contexts only. */
69 } u;
70
71 /* Pushed token to be returned by next call to get_raw_token. */
72 const cpp_token *pushed_token;
73
74 struct macro_args *args; /* 0 for arguments and object-like macros. */
75 unsigned short posn; /* Current posn, index into u. */
76 unsigned short count; /* No. of tokens in u. */
77 unsigned short level;
78 unsigned char flags;
79};
80
81typedef struct macro_args macro_args;
82struct macro_args
83{
84 unsigned int *ends;
85 const cpp_token **tokens;
86 unsigned int capacity;
87 unsigned int used;
88 unsigned short level;
89};
90
91static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
92static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
93 macro_args *, unsigned int *));
94static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
95static void save_token PARAMS ((macro_args *, const cpp_token *));
96static int pop_context PARAMS ((cpp_reader *));
97static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
98static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
99static void free_macro_args PARAMS ((macro_args *));
100
041c3194
ZW
101#define auto_expand_name_space(list) \
102 _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
f2d5f0cc
ZW
103static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
104 size_t, FILE *));
041c3194 105static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
1368ee70 106 unsigned int));
041c3194 107static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
9e62c811 108 unsigned int));
f2d5f0cc 109
041c3194
ZW
110static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
111static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
b8f41010
NB
112 unsigned char *));
113static const unsigned char *backslash_start PARAMS ((cpp_reader *,
114 const unsigned char *));
041c3194
ZW
115static int skip_block_comment PARAMS ((cpp_reader *));
116static int skip_line_comment PARAMS ((cpp_reader *));
b8f41010 117static void skip_whitespace PARAMS ((cpp_reader *, int));
bfb9dc7f
ZW
118static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
119 const U_CHAR *, const U_CHAR *));
120static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
041c3194
ZW
121static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
122 unsigned int));
b8f41010 123static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
f624ffa7
NB
124static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
125 const unsigned char *,
ad265aa4 126 unsigned int, unsigned int));
041c3194
ZW
127static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
128static int lex_next PARAMS ((cpp_reader *, int));
129static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
130 const cpp_token *));
b8f41010 131
041c3194
ZW
132static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
133static void expand_context_stack PARAMS ((cpp_reader *));
d1d9a6bd 134static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
041c3194
ZW
135 unsigned char *));
136static void output_token PARAMS ((cpp_reader *, const cpp_token *,
137 const cpp_token *));
b8f41010
NB
138typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
139 cpp_token *));
041c3194
ZW
140static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
141 unsigned int));
142static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
143static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
144 const cpp_token *));
145static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
146static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
147 const cpp_token *));
148static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
149 const cpp_token *, int *));
150static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
151static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
152static cpp_token *get_temp_token PARAMS ((cpp_reader *));
153static void release_temp_tokens PARAMS ((cpp_reader *));
154static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
155static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
b8f41010 156
bfb9dc7f
ZW
157#define INIT_TOKEN_STR(list, token) \
158 do {(token)->val.str.len = 0; \
159 (token)->val.str.text = (list)->namebuf + (list)->name_used; \
d1d9a6bd 160 } while (0)
b8f41010 161
041c3194
ZW
162#define VALID_SIGN(c, prevc) \
163 (((c) == '+' || (c) == '-') && \
164 ((prevc) == 'e' || (prevc) == 'E' \
165 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
166
b8f41010
NB
167/* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
168 character, if any, is in buffer. */
041c3194 169
b8f41010 170#define handle_newline(cur, limit, c) \
041c3194 171 do { \
b8f41010
NB
172 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
173 (cur)++; \
041c3194
ZW
174 pfile->buffer->lineno++; \
175 pfile->buffer->line_base = (cur); \
6ab3e7dd 176 pfile->col_adjust = 0; \
041c3194 177 } while (0)
b8f41010 178
041c3194 179#define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
b8f41010
NB
180#define PREV_TOKEN_TYPE (cur_token[-1].type)
181
f9a0e96c
ZW
182#define PUSH_TOKEN(ttype) cur_token++->type = (ttype)
183#define REVISE_TOKEN(ttype) cur_token[-1].type = (ttype)
184#define BACKUP_TOKEN(ttype) (--cur_token)->type = (ttype)
f617b8e2
NB
185#define BACKUP_DIGRAPH(ttype) do { \
186 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
187
188/* An upper bound on the number of bytes needed to spell a token,
189 including preceding whitespace. */
bfb9dc7f
ZW
190#define TOKEN_LEN(token) (5 + (TOKEN_SPELL(token) == SPELL_STRING \
191 ? (token)->val.str.len \
192 : (TOKEN_SPELL(token) == SPELL_IDENT \
193 ? (token)->val.node->length \
194 : 0)))
f617b8e2 195
f9a0e96c
ZW
196#define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
197#define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
198
199#define ASSIGN_FLAGS_AND_POS(d, s) \
200 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
201 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
202 } while (0)
203
204/* f is flags, just consisting of PREV_WHITE | BOL. */
205#define MODIFY_FLAGS_AND_POS(d, s, f) \
206 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
207 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
208 } while (0)
209
f617b8e2 210#define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
5d7ee2fa
NB
211#define I(e, s) {SPELL_IDENT, s},
212#define S(e, s) {SPELL_STRING, s},
b8f41010
NB
213#define C(e, s) {SPELL_CHAR, s},
214#define N(e, s) {SPELL_NONE, s},
b8f41010 215
041c3194
ZW
216const struct token_spelling
217token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
218
219#undef T
220#undef I
221#undef S
222#undef C
223#undef N
224
225/* For debugging: the internal names of the tokens. */
b449f23a
KG
226#define T(e, s) U STRINGX(e),
227#define I(e, s) U STRINGX(e),
228#define S(e, s) U STRINGX(e),
229#define C(e, s) U STRINGX(e),
230#define N(e, s) U STRINGX(e),
041c3194 231
cf00a885 232const U_CHAR *const token_names[N_TTYPES] = { TTYPE_TABLE };
b8f41010
NB
233
234#undef T
5d7ee2fa
NB
235#undef I
236#undef S
b8f41010
NB
237#undef C
238#undef N
b8f41010 239
041c3194
ZW
240/* The following table is used by trigraph_ok/trigraph_replace. If we
241 have designated initializers, it can be constant data; otherwise,
242 it is set up at runtime by _cpp_init_input_buffer. */
243
244#if (GCC_VERSION >= 2007)
245#define init_trigraph_map() /* nothing */
246#define TRIGRAPH_MAP \
247__extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
248#define END };
249#define s(p, v) [p] = v,
250#else
251#define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
252 static void init_trigraph_map PARAMS ((void)) { \
253 unsigned char *x = trigraph_map;
254#define END }
255#define s(p, v) x[p] = v;
256#endif
257
258TRIGRAPH_MAP
259 s('=', '#') s(')', ']') s('!', '|')
260 s('(', '[') s('\'', '^') s('>', '}')
261 s('/', '\\') s('<', '{') s('-', '~')
262END
263
264#undef TRIGRAPH_MAP
265#undef END
266#undef s
267
45b966db
ZW
268/* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
269
270void
271_cpp_grow_token_buffer (pfile, n)
272 cpp_reader *pfile;
273 long n;
274{
275 long old_written = CPP_WRITTEN (pfile);
276 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
277 pfile->token_buffer = (U_CHAR *)
278 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
279 CPP_SET_WRITTEN (pfile, old_written);
280}
281
f2d5f0cc
ZW
282/* Deal with the annoying semantics of fwrite. */
283static void
284safe_fwrite (pfile, buf, len, fp)
285 cpp_reader *pfile;
286 const U_CHAR *buf;
287 size_t len;
288 FILE *fp;
289{
290 size_t count;
45b966db 291
f2d5f0cc
ZW
292 while (len)
293 {
294 count = fwrite (buf, 1, len, fp);
295 if (count == 0)
296 goto error;
297 len -= count;
298 buf += count;
299 }
300 return;
301
302 error:
303 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
304}
305
306/* Notify the compiler proper that the current line number has jumped,
307 or the current file name has changed. */
308
309static void
1368ee70 310output_line_command (pfile, print, line)
45b966db 311 cpp_reader *pfile;
f2d5f0cc 312 cpp_printer *print;
1368ee70 313 unsigned int line;
45b966db 314{
041c3194 315 cpp_buffer *ip = CPP_BUFFER (pfile);
f2d5f0cc
ZW
316 enum { same = 0, enter, leave, rname } change;
317 static const char * const codes[] = { "", " 1", " 2", "" };
318
041c3194
ZW
319 if (line == 0)
320 return;
321
322 /* End the previous line of text. */
323 if (pfile->need_newline)
324 putc ('\n', print->outf);
325 pfile->need_newline = 0;
326
f2d5f0cc
ZW
327 if (CPP_OPTION (pfile, no_line_commands))
328 return;
329
041c3194
ZW
330 /* If ip is null, we've been called from cpp_finish, and they just
331 needed the final flush and trailing newline. */
332 if (!ip)
333 return;
334
fb753f88 335 if (pfile->include_depth == print->last_id)
54bef41d 336 {
fb753f88
JJ
337 /* Determine whether the current filename has changed, and if so,
338 how. 'nominal_fname' values are unique, so they can be compared
339 by comparing pointers. */
340 if (ip->nominal_fname == print->last_fname)
341 change = same;
342 else
0e500c78 343 change = rname;
fb753f88
JJ
344 }
345 else
346 {
347 if (pfile->include_depth > print->last_id)
348 change = enter;
f2d5f0cc 349 else
fb753f88
JJ
350 change = leave;
351 print->last_id = pfile->include_depth;
45b966db 352 }
fb753f88
JJ
353 print->last_fname = ip->nominal_fname;
354
f2d5f0cc
ZW
355 /* If the current file has not changed, we can output a few newlines
356 instead if we want to increase the line number by a small amount.
357 We cannot do this if print->lineno is zero, because that means we
358 haven't output any line commands yet. (The very first line
359 command output is a `same_file' command.) */
041c3194 360 if (change == same && print->lineno > 0
f2d5f0cc 361 && line >= print->lineno && line < print->lineno + 8)
45b966db 362 {
f2d5f0cc 363 while (line > print->lineno)
45b966db 364 {
f2d5f0cc
ZW
365 putc ('\n', print->outf);
366 print->lineno++;
45b966db 367 }
f2d5f0cc 368 return;
45b966db 369 }
f2d5f0cc
ZW
370
371#ifndef NO_IMPLICIT_EXTERN_C
372 if (CPP_OPTION (pfile, cplusplus))
373 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
374 codes[change],
c31a6508
ZW
375 ip->inc->sysp ? " 3" : "",
376 (ip->inc->sysp == 2) ? " 4" : "");
f2d5f0cc
ZW
377 else
378#endif
379 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
380 codes[change],
c31a6508 381 ip->inc->sysp ? " 3" : "");
f2d5f0cc
ZW
382 print->lineno = line;
383}
384
385/* Write the contents of the token_buffer to the output stream, and
386 clear the token_buffer. Also handles generating line commands and
387 keeping track of file transitions. */
388
389void
041c3194 390cpp_output_tokens (pfile, print, line)
f2d5f0cc
ZW
391 cpp_reader *pfile;
392 cpp_printer *print;
041c3194 393 unsigned int line;
f2d5f0cc 394{
f6fab919
ZW
395 if (CPP_WRITTEN (pfile) - print->written)
396 {
f6fab919
ZW
397 safe_fwrite (pfile, pfile->token_buffer,
398 CPP_WRITTEN (pfile) - print->written, print->outf);
041c3194
ZW
399 pfile->need_newline = 1;
400 if (print->lineno)
401 print->lineno++;
45b966db 402
041c3194 403 CPP_SET_WRITTEN (pfile, print->written);
f2d5f0cc 404 }
041c3194 405 output_line_command (pfile, print, line);
45b966db
ZW
406}
407
c56c2073 408/* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
f2d5f0cc
ZW
409
410void
411cpp_scan_buffer_nooutput (pfile)
412 cpp_reader *pfile;
413{
f2d5f0cc 414 unsigned int old_written = CPP_WRITTEN (pfile);
041c3194
ZW
415 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
416
f2d5f0cc
ZW
417 for (;;)
418 {
041c3194
ZW
419 /* In no-output mode, we can ignore everything but directives. */
420 const cpp_token *token = cpp_get_token (pfile);
421 if (token->type == CPP_EOF)
422 {
423 cpp_pop_buffer (pfile);
424 if (CPP_BUFFER (pfile) == stop)
425 break;
426 }
427 _cpp_skip_rest_of_line (pfile);
f2d5f0cc
ZW
428 }
429 CPP_SET_WRITTEN (pfile, old_written);
430}
431
c56c2073 432/* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
f2d5f0cc
ZW
433void
434cpp_scan_buffer (pfile, print)
435 cpp_reader *pfile;
436 cpp_printer *print;
437{
c56c2073 438 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
041c3194 439 const cpp_token *token, *prev = 0;
f2d5f0cc
ZW
440
441 for (;;)
442 {
443 token = cpp_get_token (pfile);
041c3194 444 if (token->type == CPP_EOF)
f2d5f0cc 445 {
041c3194
ZW
446 cpp_pop_buffer (pfile);
447 if (CPP_BUFFER (pfile) == stop)
f2d5f0cc 448 return;
041c3194
ZW
449 cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
450 prev = 0;
451 continue;
452 }
453
454 if (token->flags & BOL)
455 {
456 cpp_output_tokens (pfile, print, pfile->token_list.line);
457 prev = 0;
f2d5f0cc 458 }
041c3194
ZW
459
460 output_token (pfile, token, prev);
461 prev = token;
f2d5f0cc
ZW
462 }
463}
464
f9a0e96c
ZW
465/* Scan a single line of the input into the token_buffer. */
466void
467cpp_scan_line (pfile)
468 cpp_reader *pfile;
469{
470 const cpp_token *token, *prev = 0;
471
472 do
473 {
474 token = cpp_get_token (pfile);
475 if (token->type == CPP_EOF)
476 {
477 cpp_pop_buffer (pfile);
478 break;
479 }
480
481 output_token (pfile, token, prev);
482 prev = token;
483 }
484 while (pfile->cur_context > 0
485 || pfile->contexts[0].posn < pfile->contexts[0].count);
486}
487
041c3194
ZW
488/* Helper routine used by parse_include, which can't see spell_token.
489 Reinterpret the current line as an h-char-sequence (< ... >); we are
490 looking at the first token after the <. */
491const cpp_token *
492_cpp_glue_header_name (pfile)
45b966db
ZW
493 cpp_reader *pfile;
494{
041c3194
ZW
495 unsigned int written = CPP_WRITTEN (pfile);
496 const cpp_token *t;
497 cpp_token *hdr;
498 U_CHAR *buf;
499 size_t len;
500
501 for (;;)
502 {
417f3e3a 503 t = _cpp_get_token (pfile);
041c3194
ZW
504 if (t->type == CPP_GREATER || t->type == CPP_EOF)
505 break;
506
507 CPP_RESERVE (pfile, TOKEN_LEN (t));
508 if (t->flags & PREV_WHITE)
509 CPP_PUTC_Q (pfile, ' ');
510 pfile->limit = spell_token (pfile, t, pfile->limit);
511 }
512
513 if (t->type == CPP_EOF)
514 cpp_error (pfile, "missing terminating > character");
45b966db 515
041c3194
ZW
516 len = CPP_WRITTEN (pfile) - written;
517 buf = xmalloc (len);
518 memcpy (buf, pfile->token_buffer + written, len);
519 CPP_SET_WRITTEN (pfile, written);
520
521 hdr = get_temp_token (pfile);
522 hdr->type = CPP_HEADER_NAME;
523 hdr->flags = 0;
bfb9dc7f
ZW
524 hdr->val.str.text = buf;
525 hdr->val.str.len = len;
041c3194 526 return hdr;
45b966db
ZW
527}
528
1368ee70
ZW
529/* Token-buffer helper functions. */
530
d1d9a6bd
NB
531/* Expand a token list's string space. It is *vital* that
532 list->tokens_used is correct, to get pointer fix-up right. */
041c3194
ZW
533void
534_cpp_expand_name_space (list, len)
1368ee70 535 cpp_toklist *list;
c5a04734
ZW
536 unsigned int len;
537{
f617b8e2 538 const U_CHAR *old_namebuf;
f617b8e2
NB
539
540 old_namebuf = list->namebuf;
c5a04734
ZW
541 list->name_cap += len;
542 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
f617b8e2
NB
543
544 /* Fix up token text pointers. */
79f50f2a 545 if (list->namebuf != old_namebuf)
f617b8e2
NB
546 {
547 unsigned int i;
548
549 for (i = 0; i < list->tokens_used; i++)
bfb9dc7f
ZW
550 if (token_spellings[list->tokens[i].type].type == SPELL_STRING)
551 list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
f617b8e2 552 }
1368ee70
ZW
553}
554
041c3194
ZW
555/* If there is not enough room for LEN more characters, expand the
556 list by just enough to have room for LEN characters. */
557void
558_cpp_reserve_name_space (list, len)
559 cpp_toklist *list;
560 unsigned int len;
561{
562 unsigned int room = list->name_cap - list->name_used;
563
564 if (room < len)
565 _cpp_expand_name_space (list, len - room);
566}
567
1368ee70 568/* Expand the number of tokens in a list. */
d1d9a6bd
NB
569void
570_cpp_expand_token_space (list, count)
1368ee70 571 cpp_toklist *list;
d1d9a6bd 572 unsigned int count;
1368ee70 573{
d1d9a6bd
NB
574 unsigned int n;
575
576 list->tokens_cap += count;
577 n = list->tokens_cap;
15dad1d9 578 if (list->flags & LIST_OFFSET)
d1d9a6bd 579 list->tokens--, n++;
1368ee70 580 list->tokens = (cpp_token *)
d1d9a6bd 581 xrealloc (list->tokens, n * sizeof (cpp_token));
15dad1d9
ZW
582 if (list->flags & LIST_OFFSET)
583 list->tokens++; /* Skip the dummy. */
1368ee70
ZW
584}
585
d1d9a6bd
NB
586/* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
587 an extra token in front of the token list, as this allows the lexer
588 to always peek at the previous token without worrying about
589 underflowing the list, and some initial space. Otherwise, no
590 token- or name-space is allocated, and there is no dummy token. */
15dad1d9 591void
d1d9a6bd 592_cpp_init_toklist (list, flags)
1368ee70 593 cpp_toklist *list;
d1d9a6bd 594 int flags;
1368ee70 595{
d1d9a6bd
NB
596 if (flags == NO_DUMMY_TOKEN)
597 {
598 list->tokens_cap = 0;
041c3194 599 list->tokens = 0;
d1d9a6bd 600 list->name_cap = 0;
041c3194 601 list->namebuf = 0;
d1d9a6bd
NB
602 list->flags = 0;
603 }
604 else
605 {
606 /* Initialize token space. Put a dummy token before the start
607 that will fail matches. */
608 list->tokens_cap = 256; /* 4K's worth. */
609 list->tokens = (cpp_token *)
610 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
611 list->tokens[0].type = CPP_EOF;
612 list->tokens++;
613
614 /* Initialize name space. */
615 list->name_cap = 1024;
041c3194 616 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
d1d9a6bd
NB
617 list->flags = LIST_OFFSET;
618 }
15dad1d9 619
15dad1d9
ZW
620 _cpp_clear_toklist (list);
621}
1368ee70 622
15dad1d9
ZW
623/* Clear a token list. */
624void
625_cpp_clear_toklist (list)
626 cpp_toklist *list;
627{
c5a04734
ZW
628 list->tokens_used = 0;
629 list->name_used = 0;
041c3194
ZW
630 list->directive = 0;
631 list->paramc = 0;
632 list->params_len = 0;
15dad1d9
ZW
633 list->flags &= LIST_OFFSET; /* clear all but that one */
634}
635
636/* Free a token list. Does not free the list itself, which may be
637 embedded in a larger structure. */
638void
639_cpp_free_toklist (list)
041c3194 640 const cpp_toklist *list;
15dad1d9 641{
15dad1d9
ZW
642 if (list->flags & LIST_OFFSET)
643 free (list->tokens - 1); /* Backup over dummy token. */
644 else
645 free (list->tokens);
646 free (list->namebuf);
1368ee70
ZW
647}
648
15dad1d9
ZW
649/* Compare two tokens. */
650int
651_cpp_equiv_tokens (a, b)
652 const cpp_token *a, *b;
653{
041c3194
ZW
654 if (a->type == b->type && a->flags == b->flags)
655 switch (token_spellings[a->type].type)
656 {
657 default: /* Keep compiler happy. */
658 case SPELL_OPERATOR:
659 return 1;
660 case SPELL_CHAR:
661 case SPELL_NONE:
662 return a->val.aux == b->val.aux; /* arg_no or character. */
663 case SPELL_IDENT:
bfb9dc7f 664 return a->val.node == b->val.node;
041c3194 665 case SPELL_STRING:
bfb9dc7f
ZW
666 return (a->val.str.len == b->val.str.len
667 && !memcmp (a->val.str.text, b->val.str.text,
668 a->val.str.len));
041c3194 669 }
15dad1d9 670
041c3194 671 return 0;
15dad1d9
ZW
672}
673
674/* Compare two token lists. */
675int
676_cpp_equiv_toklists (a, b)
677 const cpp_toklist *a, *b;
678{
679 unsigned int i;
680
041c3194
ZW
681 if (a->tokens_used != b->tokens_used
682 || a->flags != b->flags
683 || a->paramc != b->paramc)
15dad1d9
ZW
684 return 0;
685
686 for (i = 0; i < a->tokens_used; i++)
687 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
688 return 0;
689 return 1;
690}
691
041c3194 692/* Utility routine:
9e62c811 693
bfb9dc7f
ZW
694 Compares, the token TOKEN to the NUL-terminated string STRING.
695 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
15dad1d9 696
041c3194 697int
bfb9dc7f
ZW
698cpp_ideq (token, string)
699 const cpp_token *token;
041c3194
ZW
700 const char *string;
701{
bfb9dc7f 702 if (token->type != CPP_NAME)
041c3194 703 return 0;
bfb9dc7f
ZW
704
705 return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
15dad1d9 706}
1368ee70 707
041c3194 708/* Lexing algorithm.
45b966db 709
041c3194
ZW
710 The original lexer in cpplib was made up of two passes: a first pass
711 that replaced trigraphs and deleted esacped newlines, and a second
712 pass that tokenized the result of the first pass. Tokenisation was
713 performed by peeking at the next character in the input stream. For
714 example, if the input stream contained "!=", the handler for the !
715 character would peek at the next character, and if it were a '='
716 would skip over it, and return a "!=" token, otherwise it would
717 return just the "!" token.
61474454 718
041c3194
ZW
719 To implement a single-pass lexer, this peeking ahead is unworkable.
720 An arbitrary number of escaped newlines, and trigraphs (in particular
721 ??/ which translates to the escape \), could separate the '!' and '='
722 in the input stream, yet the next token is still a "!=".
61474454 723
041c3194
ZW
724 Suppose instead that we lex by one logical line at a time, producing
725 a token list or stack for each logical line, and when seeing the '!'
726 push a CPP_NOT token on the list. Then if the '!' is part of a
727 longer token ("!=") we know we must see the remainder of the token by
728 the time we reach the end of the logical line. Thus we can have the
729 '=' handler look at the previous token (at the end of the list / top
730 of the stack) and see if it is a "!" token, and if so, instead of
731 pushing a "=" token revise the existing token to be a "!=" token.
61474454 732
041c3194
ZW
733 This works in the presence of escaped newlines, because the '\' would
734 have been pushed on the top of the stack as a CPP_BACKSLASH. The
735 newline ('\n' or '\r') handler looks at the token at the top of the
736 stack to see if it is a CPP_BACKSLASH, and if so discards both.
737 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
738 the '=' handler would never see any intervening escaped newlines.
45b966db 739
041c3194
ZW
740 To make trigraphs work in this context, as in precedence trigraphs
741 are highest and converted before anything else, the '?' handler does
742 lookahead to see if it is a trigraph, and if so skips the trigraph
743 and pushes the token it represents onto the top of the stack. This
744 also works in the particular case of a CPP_BACKSLASH trigraph.
45b966db 745
041c3194
ZW
746 To the preprocessor, whitespace is only significant to the point of
747 knowing whether whitespace precedes a particular token. For example,
748 the '=' handler needs to know whether there was whitespace between it
749 and a "!" token on the top of the stack, to make the token conversion
750 decision correctly. So each token has a PREV_WHITE flag to
751 indicate this - the standard permits consecutive whitespace to be
752 regarded as a single space. The compiler front ends are not
753 interested in whitespace at all; they just require a token stream.
754 Another place where whitespace is significant to the preprocessor is
755 a #define statment - if there is whitespace between the macro name
756 and an initial "(" token the macro is "object-like", otherwise it is
757 a function-like macro that takes arguments.
758
759 However, all is not rosy. Parsing of identifiers, numbers, comments
760 and strings becomes trickier because of the possibility of raw
761 trigraphs and escaped newlines in the input stream.
762
763 The trigraphs are three consecutive characters beginning with two
764 question marks. A question mark is not valid as part of a number or
765 identifier, so parsing of a number or identifier terminates normally
766 upon reaching it, returning to the mainloop which handles the
767 trigraph just like it would in any other position. Similarly for the
768 backslash of a backslash-newline combination. So we just need the
769 escaped-newline dropper in the mainloop to check if the token on the
770 top of the stack after dropping the escaped newline is a number or
771 identifier, and if so to continue the processing it as if nothing had
772 happened.
773
774 For strings, we replace trigraphs whenever we reach a quote or
775 newline, because there might be a backslash trigraph escaping them.
776 We need to be careful that we start trigraph replacing from where we
777 left off previously, because it is possible for a first scan to leave
778 "fake" trigraphs that a second scan would pick up as real (e.g. the
779 sequence "????/\n=" would find a fake ??= trigraph after removing the
780 escaped newline.)
781
782 For line comments, on reaching a newline we scan the previous
783 character(s) to see if it escaped, and continue if it is. Block
784 comments ignore everything and just focus on finding the comment
785 termination mark. The only difficult thing, and it is surprisingly
786 tricky, is checking if an asterisk precedes the final slash since
787 they could be separated by escaped newlines. If the preprocessor is
788 invoked with the output comments option, we don't bother removing
789 escaped newlines and replacing trigraphs for output.
790
791 Finally, numbers can begin with a period, which is pushed initially
792 as a CPP_DOT token in its own right. The digit handler checks if the
793 previous token was a CPP_DOT not separated by whitespace, and if so
794 pops it off the stack and pushes a period into the number's buffer
795 before calling the number parser.
45b966db 796
041c3194
ZW
797*/
798
799static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
800 U":>", U"<%", U"%>"};
45b966db 801
041c3194
ZW
802/* Call when a trigraph is encountered. It warns if necessary, and
803 returns true if the trigraph should be honoured. END is the third
804 character of a trigraph in the input stream. */
45b966db 805static int
041c3194 806trigraph_ok (pfile, end)
45b966db 807 cpp_reader *pfile;
041c3194 808 const unsigned char *end;
45b966db 809{
041c3194
ZW
810 int accept = CPP_OPTION (pfile, trigraphs);
811
812 if (CPP_OPTION (pfile, warn_trigraphs))
45b966db 813 {
041c3194
ZW
814 unsigned int col = end - 1 - pfile->buffer->line_base;
815 if (accept)
816 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
817 "trigraph ??%c converted to %c",
818 (int) *end, (int) trigraph_map[*end]);
45b966db 819 else
041c3194
ZW
820 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
821 "trigraph ??%c ignored", (int) *end);
45b966db 822 }
041c3194 823 return accept;
45b966db
ZW
824}
825
041c3194
ZW
826/* Scan a string for trigraphs, warning or replacing them inline as
827 appropriate. When parsing a string, we must call this routine
828 before processing a newline character (if trigraphs are enabled),
829 since the newline might be escaped by a preceding backslash
830 trigraph sequence. Returns a pointer to the end of the name after
831 replacement. */
832
833static unsigned char *
834trigraph_replace (pfile, src, limit)
45b966db 835 cpp_reader *pfile;
041c3194
ZW
836 unsigned char *src;
837 unsigned char *limit;
45b966db 838{
041c3194
ZW
839 unsigned char *dest;
840
841 /* Starting with src[1], find two consecutive '?'. The case of no
842 trigraphs is streamlined. */
843
043afb2a 844 for (src++; src + 1 < limit; src += 2)
041c3194
ZW
845 {
846 if (src[0] != '?')
847 continue;
848
849 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
850 if (src[-1] == '?')
851 src--;
852 else if (src + 2 == limit || src[1] != '?')
853 continue;
45b966db 854
041c3194
ZW
855 /* Check if it really is a trigraph. */
856 if (trigraph_map[src[2]] == 0)
857 continue;
45b966db 858
041c3194
ZW
859 dest = src;
860 goto trigraph_found;
861 }
862 return limit;
45b966db 863
041c3194
ZW
864 /* Now we have a trigraph, we need to scan the remaining buffer, and
865 copy-shifting its contents left if replacement is enabled. */
866 for (; src + 2 < limit; dest++, src++)
867 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
868 {
869 trigraph_found:
870 src += 2;
871 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
872 *dest = trigraph_map[*src];
873 }
874
875 /* Copy remaining (at most 2) characters. */
876 while (src < limit)
877 *dest++ = *src++;
878 return dest;
45b966db
ZW
879}
880
041c3194
ZW
881/* If CUR is a backslash or the end of a trigraphed backslash, return
882 a pointer to its beginning, otherwise NULL. We don't read beyond
883 the buffer start, because there is the start of the comment in the
884 buffer. */
885static const unsigned char *
886backslash_start (pfile, cur)
64aaf407 887 cpp_reader *pfile;
041c3194 888 const unsigned char *cur;
64aaf407 889{
041c3194
ZW
890 if (cur[0] == '\\')
891 return cur;
892 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
893 && trigraph_ok (pfile, cur))
894 return cur - 2;
895 return 0;
64aaf407
NB
896}
897
041c3194
ZW
898/* Skip a C-style block comment. This is probably the trickiest
899 handler. We find the end of the comment by seeing if an asterisk
900 is before every '/' we encounter. The nasty complication is that a
901 previous asterisk may be separated by one or more escaped newlines.
902 Returns non-zero if comment terminated by EOF, zero otherwise. */
903static int
904skip_block_comment (pfile)
45b966db
ZW
905 cpp_reader *pfile;
906{
041c3194
ZW
907 cpp_buffer *buffer = pfile->buffer;
908 const unsigned char *char_after_star = 0;
909 register const unsigned char *cur = buffer->cur;
910 int seen_eof = 0;
911
912 /* Inner loop would think the comment has ended if the first comment
913 character is a '/'. Avoid this and keep the inner loop clean by
914 skipping such a character. */
915 if (cur < buffer->rlimit && cur[0] == '/')
916 cur++;
64aaf407 917
041c3194 918 for (; cur < buffer->rlimit; )
45b966db 919 {
041c3194
ZW
920 unsigned char c = *cur++;
921
922 /* People like decorating comments with '*', so check for
923 '/' instead for efficiency. */
924 if (c == '/')
45b966db 925 {
041c3194
ZW
926 if (cur[-2] == '*' || cur - 1 == char_after_star)
927 goto out;
928
929 /* Warn about potential nested comments, but not when
930 the final character inside the comment is a '/'.
931 Don't bother to get it right across escaped newlines. */
932 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
933 && cur[0] == '*' && cur[1] != '/')
45b966db 934 {
041c3194
ZW
935 buffer->cur = cur;
936 cpp_warning (pfile, "'/*' within comment");
45b966db 937 }
45b966db 938 }
91fcd158 939 else if (is_vspace (c))
45b966db 940 {
041c3194
ZW
941 const unsigned char* bslash = backslash_start (pfile, cur - 2);
942
943 handle_newline (cur, buffer->rlimit, c);
944 /* Work correctly if there is an asterisk before an
945 arbirtrarily long sequence of escaped newlines. */
946 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
947 char_after_star = cur;
948 else
949 char_after_star = 0;
45b966db 950 }
45b966db 951 }
041c3194
ZW
952 seen_eof = 1;
953
64aaf407 954 out:
041c3194
ZW
955 buffer->cur = cur;
956 return seen_eof;
45b966db
ZW
957}
958
f9a0e96c
ZW
959/* Skip a C++ line comment. Handles escaped newlines. Returns
960 non-zero if a multiline comment. */
041c3194
ZW
961static int
962skip_line_comment (pfile)
45b966db
ZW
963 cpp_reader *pfile;
964{
041c3194
ZW
965 cpp_buffer *buffer = pfile->buffer;
966 register const unsigned char *cur = buffer->cur;
967 int multiline = 0;
968
969 for (; cur < buffer->rlimit; )
970 {
971 unsigned char c = *cur++;
972
91fcd158 973 if (is_vspace (c))
45b966db 974 {
041c3194
ZW
975 /* Check for a (trigaph?) backslash escaping the newline. */
976 if (!backslash_start (pfile, cur - 2))
977 goto out;
978 multiline = 1;
979 handle_newline (cur, buffer->rlimit, c);
980 }
981 }
982 cur++;
45b966db 983
041c3194
ZW
984 out:
985 buffer->cur = cur - 1; /* Leave newline for caller. */
986 return multiline;
987}
45b966db 988
041c3194
ZW
989/* Skips whitespace, stopping at next non-whitespace character.
990 Adjusts pfile->col_adjust to account for tabs. This enables tokens
991 to be assigned the correct column. */
992static void
993skip_whitespace (pfile, in_directive)
994 cpp_reader *pfile;
995 int in_directive;
996{
997 cpp_buffer *buffer = pfile->buffer;
91fcd158 998 unsigned short warned = 0;
45b966db 999
91fcd158
NB
1000 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
1001 while (buffer->cur < buffer->rlimit)
041c3194 1002 {
91fcd158 1003 unsigned char c = *buffer->cur;
45b966db 1004
91fcd158
NB
1005 if (!is_nvspace (c))
1006 break;
1007
1008 buffer->cur++;
1009 /* Horizontal space always OK. */
1010 if (c == ' ')
1011 continue;
1012 else if (c == '\t')
1013 pfile->col_adjust += CPP_OPTION (pfile, tabstop) - 1
1014 - (CPP_BUF_COL (buffer) - 1) % CPP_OPTION(pfile, tabstop);
1015 /* Must be \f \v or \0. */
1016 else if (c == '\0')
041c3194 1017 {
91fcd158
NB
1018 if (!warned)
1019 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
1020 CPP_BUF_COL (buffer),
1021 "embedded null character ignored");
1022 warned = 1;
45b966db 1023 }
041c3194 1024 else if (in_directive && CPP_PEDANTIC (pfile))
91fcd158
NB
1025 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1026 CPP_BUF_COL (buffer),
1027 "%s in preprocessing directive",
1028 c == '\f' ? "form feed" : "vertical tab");
45b966db 1029 }
041c3194 1030}
45b966db 1031
041c3194 1032/* Parse (append) an identifier. */
417f3e3a 1033static const U_CHAR *
bfb9dc7f 1034parse_name (pfile, tok, cur, rlimit)
45b966db 1035 cpp_reader *pfile;
bfb9dc7f
ZW
1036 cpp_token *tok;
1037 const U_CHAR *cur, *rlimit;
45b966db 1038{
bfb9dc7f
ZW
1039 const U_CHAR *name = cur;
1040 unsigned int len;
041c3194 1041
bfb9dc7f 1042 while (cur < rlimit)
041c3194 1043 {
bfb9dc7f
ZW
1044 if (! is_idchar (*cur))
1045 break;
e5ec2402
ZW
1046 /* $ is not a legal identifier character in the standard, but is
1047 commonly accepted as an extension. Don't warn about it in
1048 skipped conditional blocks. */
bfb9dc7f 1049 if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
041c3194 1050 {
bfb9dc7f 1051 CPP_BUFFER (pfile)->cur = cur;
041c3194
ZW
1052 cpp_pedwarn (pfile, "'$' character in identifier");
1053 }
bfb9dc7f 1054 cur++;
041c3194 1055 }
bfb9dc7f 1056 len = cur - name;
45b966db 1057
bfb9dc7f 1058 if (tok->val.node)
041c3194 1059 {
bfb9dc7f
ZW
1060 unsigned int oldlen = tok->val.node->length;
1061 U_CHAR *newname = alloca (oldlen + len);
1062 memcpy (newname, tok->val.node->name, oldlen);
1063 memcpy (newname + oldlen, name, len);
1064 len += oldlen;
1065 name = newname;
041c3194
ZW
1066 }
1067
bfb9dc7f
ZW
1068 tok->val.node = cpp_lookup (pfile, name, len);
1069 return cur;
45b966db
ZW
1070}
1071
041c3194 1072/* Parse (append) a number. */
45b966db 1073static void
041c3194 1074parse_number (pfile, list, name)
45b966db 1075 cpp_reader *pfile;
041c3194 1076 cpp_toklist *list;
bfb9dc7f 1077 cpp_string *name;
45b966db 1078{
041c3194
ZW
1079 const unsigned char *name_limit;
1080 unsigned char *namebuf;
1081 cpp_buffer *buffer = pfile->buffer;
1082 register const unsigned char *cur = buffer->cur;
45b966db 1083
041c3194
ZW
1084 expanded:
1085 name_limit = list->namebuf + list->name_cap;
1086 namebuf = list->namebuf + list->name_used;
45b966db 1087
041c3194
ZW
1088 for (; cur < buffer->rlimit && namebuf < name_limit; )
1089 {
1090 unsigned char c = *namebuf = *cur; /* Copy a single char. */
45b966db 1091
041c3194
ZW
1092 /* Perhaps we should accept '$' here if we accept it for
1093 identifiers. We know namebuf[-1] is safe, because for c to
1094 be a sign we must have pushed at least one character. */
1095 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1096 goto out;
45b966db 1097
041c3194
ZW
1098 namebuf++;
1099 cur++;
45b966db 1100 }
64aaf407 1101
041c3194
ZW
1102 /* Run out of name space? */
1103 if (cur < buffer->rlimit)
1104 {
1105 list->name_used = namebuf - list->namebuf;
1106 auto_expand_name_space (list);
1107 goto expanded;
1108 }
1109
64aaf407 1110 out:
041c3194
ZW
1111 buffer->cur = cur;
1112 name->len = namebuf - name->text;
1113 list->name_used = namebuf - list->namebuf;
45b966db
ZW
1114}
1115
041c3194 1116/* Places a string terminated by an unescaped TERMINATOR into a
bfb9dc7f 1117 cpp_string, which should be expandable and thus at the top of the
041c3194
ZW
1118 list's stack. Handles embedded trigraphs, if necessary, and
1119 escaped newlines.
45b966db 1120
041c3194
ZW
1121 Can be used for character constants (terminator = '\''), string
1122 constants ('"') and angled headers ('>'). Multi-line strings are
1123 allowed, except for within directives. */
45b966db 1124
041c3194
ZW
1125static void
1126parse_string (pfile, list, token, terminator)
45b966db 1127 cpp_reader *pfile;
041c3194
ZW
1128 cpp_toklist *list;
1129 cpp_token *token;
1130 unsigned int terminator;
45b966db 1131{
041c3194 1132 cpp_buffer *buffer = pfile->buffer;
bfb9dc7f 1133 cpp_string *name = &token->val.str;
041c3194
ZW
1134 register const unsigned char *cur = buffer->cur;
1135 const unsigned char *name_limit;
1136 unsigned char *namebuf;
1137 unsigned int null_count = 0;
1138 unsigned int trigraphed = list->name_used;
45b966db 1139
041c3194
ZW
1140 expanded:
1141 name_limit = list->namebuf + list->name_cap;
1142 namebuf = list->namebuf + list->name_used;
f2d5f0cc 1143
041c3194 1144 for (; cur < buffer->rlimit && namebuf < name_limit; )
45b966db 1145 {
041c3194 1146 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
45b966db 1147
041c3194
ZW
1148 if (c == '\0')
1149 null_count++;
91fcd158 1150 else if (c == terminator || is_vspace (c))
45b966db 1151 {
041c3194
ZW
1152 /* Needed for trigraph_replace and multiline string warning. */
1153 buffer->cur = cur;
5eec0563 1154
041c3194
ZW
1155 /* Scan for trigraphs before checking if backslash-escaped. */
1156 if ((CPP_OPTION (pfile, trigraphs)
1157 || CPP_OPTION (pfile, warn_trigraphs))
1158 && namebuf - (list->namebuf + trigraphed) >= 3)
45b966db 1159 {
041c3194
ZW
1160 namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1161 namebuf);
1162 /* The test above guarantees trigraphed will be positive. */
1163 trigraphed = namebuf - list->namebuf - 2;
45b966db 1164 }
45b966db 1165
041c3194 1166 namebuf--; /* Drop the newline / terminator from the name. */
91fcd158 1167 if (is_vspace (c))
45b966db 1168 {
041c3194
ZW
1169 /* Drop a backslash newline, and continue. */
1170 if (namebuf[-1] == '\\')
45b966db 1171 {
041c3194
ZW
1172 handle_newline (cur, buffer->rlimit, c);
1173 namebuf--;
1174 continue;
45b966db 1175 }
45b966db 1176
041c3194 1177 cur--;
45b966db 1178
f9a0e96c
ZW
1179 /* In assembly language, silently terminate strings of
1180 either variety at end of line. This is a kludge
1181 around not knowing where comments are. */
1182 if (CPP_OPTION (pfile, lang_asm))
041c3194 1183 goto out;
64aaf407 1184
f9a0e96c
ZW
1185 /* Character constants and header names may not extend
1186 over multiple lines. In Standard C, neither may
1187 strings. We accept multiline strings as an
041c3194 1188 extension. (Even in directives - otherwise, glibc's
f9a0e96c 1189 longlong.h breaks.) */
041c3194
ZW
1190 if (terminator != '"')
1191 goto unterminated;
1192
1193 cur++; /* Move forwards again. */
45b966db 1194
041c3194
ZW
1195 if (pfile->multiline_string_line == 0)
1196 {
1197 pfile->multiline_string_line = token->line;
1198 pfile->multiline_string_column = token->col;
1199 if (CPP_PEDANTIC (pfile))
1200 cpp_pedwarn (pfile, "multi-line string constant");
1201 }
1202
1203 *namebuf++ = '\n';
1204 handle_newline (cur, buffer->rlimit, c);
45b966db
ZW
1205 }
1206 else
1207 {
041c3194 1208 unsigned char *temp;
45b966db 1209
041c3194
ZW
1210 /* An odd number of consecutive backslashes represents
1211 an escaped terminator. */
1212 temp = namebuf - 1;
1213 while (temp >= name->text && *temp == '\\')
1214 temp--;
45b966db 1215
041c3194
ZW
1216 if ((namebuf - temp) & 1)
1217 goto out;
1218 namebuf++;
1219 }
45b966db 1220 }
ff2b53ef 1221 }
45b966db 1222
041c3194
ZW
1223 /* Run out of name space? */
1224 if (cur < buffer->rlimit)
45b966db 1225 {
041c3194
ZW
1226 list->name_used = namebuf - list->namebuf;
1227 auto_expand_name_space (list);
1228 goto expanded;
1229 }
45b966db 1230
041c3194
ZW
1231 /* We may not have trigraph-replaced the input for this code path,
1232 but as the input is in error by being unterminated we don't
1233 bother. Prevent warnings about no newlines at EOF. */
91fcd158 1234 if (is_vspace (cur[-1]))
041c3194 1235 cur--;
45b966db 1236
041c3194
ZW
1237 unterminated:
1238 cpp_error (pfile, "missing terminating %c character", (int) terminator);
476f2869 1239
041c3194
ZW
1240 if (terminator == '\"' && pfile->multiline_string_line != list->line
1241 && pfile->multiline_string_line != 0)
1242 {
1243 cpp_error_with_line (pfile, pfile->multiline_string_line,
1244 pfile->multiline_string_column,
1245 "possible start of unterminated string literal");
1246 pfile->multiline_string_line = 0;
45b966db 1247 }
041c3194
ZW
1248
1249 out:
1250 buffer->cur = cur;
1251 name->len = namebuf - name->text;
1252 list->name_used = namebuf - list->namebuf;
45b966db 1253
041c3194
ZW
1254 if (null_count > 0)
1255 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1256 : "null character preserved"));
45b966db
ZW
1257}
1258
041c3194 1259/* The character TYPE helps us distinguish comment types: '*' = C
f9a0e96c
ZW
1260 style, '/' = C++ style. For code simplicity, the stored comment
1261 includes the comment start and any terminator. */
041c3194
ZW
1262
1263#define COMMENT_START_LEN 2
9e62c811 1264static void
041c3194
ZW
1265save_comment (list, token, from, len, type)
1266 cpp_toklist *list;
1267 cpp_token *token;
1268 const unsigned char *from;
9e62c811 1269 unsigned int len;
041c3194 1270 unsigned int type;
9e62c811 1271{
041c3194
ZW
1272 unsigned char *buffer;
1273
1274 len += COMMENT_START_LEN;
9e62c811 1275
041c3194
ZW
1276 if (list->name_used + len > list->name_cap)
1277 _cpp_expand_name_space (list, len);
9e62c811 1278
bfb9dc7f 1279 INIT_TOKEN_STR (list, token);
041c3194 1280 token->type = CPP_COMMENT;
bfb9dc7f 1281 token->val.str.len = len;
45b966db 1282
041c3194
ZW
1283 buffer = list->namebuf + list->name_used;
1284 list->name_used += len;
45b966db 1285
041c3194
ZW
1286 /* Copy the comment. */
1287 if (type == '*')
45b966db 1288 {
041c3194
ZW
1289 *buffer++ = '/';
1290 *buffer++ = '*';
45b966db 1291 }
041c3194 1292 else
ea4a453b 1293 {
041c3194
ZW
1294 *buffer++ = type;
1295 *buffer++ = type;
ea4a453b 1296 }
041c3194 1297 memcpy (buffer, from, len - COMMENT_START_LEN);
45b966db
ZW
1298}
1299
041c3194
ZW
1300/*
1301 * The tokenizer's main loop. Returns a token list, representing a
1302 * logical line in the input file. On EOF after some tokens have
1303 * been processed, we return immediately. Then in next call, or if
1304 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1305 * token is placed in the list.
1306 *
1307 * Implementation relies almost entirely on lookback, rather than
1308 * looking forwards. This means that tokenization requires just
1309 * a single pass of the file, even in the presence of trigraphs and
1310 * escaped newlines, providing significant performance benefits.
1311 * Trigraph overhead is negligible if they are disabled, and low
1312 * even when enabled.
1313 */
1314
91fcd158 1315#define KNOWN_DIRECTIVE() (list->directive != 0)
041c3194
ZW
1316#define MIGHT_BE_DIRECTIVE() \
1317(cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
45b966db 1318
041c3194
ZW
1319static void
1320lex_line (pfile, list)
45b966db 1321 cpp_reader *pfile;
041c3194 1322 cpp_toklist *list;
45b966db 1323{
041c3194
ZW
1324 cpp_token *cur_token, *token_limit, *first;
1325 cpp_buffer *buffer = pfile->buffer;
1326 const unsigned char *cur = buffer->cur;
1327 unsigned char flags = 0;
1328 unsigned int first_token = list->tokens_used;
45b966db 1329
041c3194
ZW
1330 if (!(list->flags & LIST_OFFSET))
1331 (abort) ();
1332
1333 list->file = buffer->nominal_fname;
1334 list->line = CPP_BUF_LINE (buffer);
1335 pfile->col_adjust = 0;
1336 pfile->in_lex_line = 1;
1337 if (cur == buffer->buf)
1338 list->flags |= BEG_OF_FILE;
1339
1340 expanded:
1341 token_limit = list->tokens + list->tokens_cap;
1342 cur_token = list->tokens + list->tokens_used;
45b966db 1343
041c3194 1344 for (; cur < buffer->rlimit && cur_token < token_limit;)
45b966db 1345 {
041c3194 1346 unsigned char c;
45b966db 1347
91fcd158
NB
1348 /* Optimize non-vertical whitespace skipping; most tokens are
1349 probably separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
1350 c = *cur;
1351 if (is_nvspace (c))
45b966db 1352 {
91fcd158
NB
1353 buffer->cur = cur;
1354 skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1355 && cur_token > &list->tokens[first_token]));
041c3194 1356 cur = buffer->cur;
ff2b53ef 1357
041c3194
ZW
1358 flags = PREV_WHITE;
1359 if (cur == buffer->rlimit)
1360 break;
91fcd158 1361 c = *cur;
45b966db 1362 }
91fcd158 1363 cur++;
45b966db 1364
041c3194
ZW
1365 /* Initialize current token. CPP_EOF will not be fixed up by
1366 expand_name_space. */
1367 list->tokens_used = cur_token - list->tokens + 1;
1368 cur_token->type = CPP_EOF;
1369 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1370 cur_token->line = CPP_BUF_LINE (buffer);
1371 cur_token->flags = flags;
1372 flags = 0;
46d07497 1373
041c3194
ZW
1374 switch (c)
1375 {
1376 case '0': case '1': case '2': case '3': case '4':
1377 case '5': case '6': case '7': case '8': case '9':
1378 {
1379 int prev_dot;
46d07497 1380
041c3194
ZW
1381 cur--; /* Backup character. */
1382 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1383 if (prev_dot)
1384 cur_token--;
bfb9dc7f 1385 INIT_TOKEN_STR (list, cur_token);
041c3194
ZW
1386 /* Prepend an immediately previous CPP_DOT token. */
1387 if (prev_dot)
1388 {
1389 if (list->name_cap == list->name_used)
1390 auto_expand_name_space (list);
46d07497 1391
bfb9dc7f 1392 cur_token->val.str.len = 1;
041c3194
ZW
1393 list->namebuf[list->name_used++] = '.';
1394 }
46d07497 1395
041c3194
ZW
1396 continue_number:
1397 cur_token->type = CPP_NUMBER; /* Before parse_number. */
1398 buffer->cur = cur;
bfb9dc7f 1399 parse_number (pfile, list, &cur_token->val.str);
041c3194
ZW
1400 cur = buffer->cur;
1401 }
1402 /* Check for # 123 form of #line. */
1403 if (MIGHT_BE_DIRECTIVE ())
1404 list->directive = _cpp_check_linemarker (pfile, cur_token,
1405 !(cur_token[-1].flags
1406 & PREV_WHITE));
1407 cur_token++;
1408 break;
46d07497 1409
041c3194
ZW
1410 letter:
1411 case '_':
1412 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1413 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1414 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1415 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1416 case 'y': case 'z':
1417 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1418 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1419 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1420 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1421 case 'Y': case 'Z':
1422 cur--; /* Backup character. */
bfb9dc7f 1423 cur_token->val.node = 0;
041c3194
ZW
1424 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
1425
1426 continue_name:
bfb9dc7f 1427 cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
45b966db 1428
041c3194
ZW
1429 if (MIGHT_BE_DIRECTIVE ())
1430 list->directive = _cpp_check_directive (pfile, cur_token,
1431 !(list->tokens[0].flags
1432 & PREV_WHITE));
1433 cur_token++;
1434 break;
f8f769ea 1435
041c3194 1436 case '\'':
041c3194 1437 case '\"':
041c3194
ZW
1438 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1439 /* Do we have a wide string? */
1440 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
f9a0e96c
ZW
1441 && cur_token[-1].val.node == pfile->spec_nodes->n_L)
1442 BACKUP_TOKEN (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
45b966db 1443
041c3194
ZW
1444 do_parse_string:
1445 /* Here c is one of ' " or >. */
bfb9dc7f 1446 INIT_TOKEN_STR (list, cur_token);
041c3194
ZW
1447 buffer->cur = cur;
1448 parse_string (pfile, list, cur_token, c);
1449 cur = buffer->cur;
1450 cur_token++;
1451 break;
45b966db 1452
041c3194
ZW
1453 case '/':
1454 cur_token->type = CPP_DIV;
1455 if (IMMED_TOKEN ())
1456 {
1457 if (PREV_TOKEN_TYPE == CPP_DIV)
1458 {
1459 /* We silently allow C++ comments in system headers,
1460 irrespective of conformance mode, because lots of
1461 broken systems do that and trying to clean it up
1462 in fixincludes is a nightmare. */
1463 if (CPP_IN_SYSTEM_HEADER (pfile))
1464 goto do_line_comment;
1465 else if (CPP_OPTION (pfile, cplusplus_comments))
1466 {
1467 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1468 && ! buffer->warned_cplusplus_comments)
1469 {
1470 buffer->cur = cur;
1471 cpp_pedwarn (pfile,
1472 "C++ style comments are not allowed in ISO C89");
1473 cpp_pedwarn (pfile,
1474 "(this will be reported only once per input file)");
1475 buffer->warned_cplusplus_comments = 1;
1476 }
1477 do_line_comment:
1478 buffer->cur = cur;
1479#if 0 /* Leave until new lexer in place. */
1480 if (cur[-2] != c)
1481 cpp_warning (pfile,
1482 "comment start split across lines");
1483#endif
1484 if (skip_line_comment (pfile))
1485 cpp_warning (pfile, "multi-line comment");
f8f769ea 1486
041c3194
ZW
1487 /* Back-up to first '-' or '/'. */
1488 cur_token--;
1489 if (!CPP_OPTION (pfile, discard_comments)
91fcd158 1490 && (!KNOWN_DIRECTIVE()
041c3194
ZW
1491 || (list->directive->flags & COMMENTS)))
1492 save_comment (list, cur_token++, cur,
1493 buffer->cur - cur, c);
f9a0e96c 1494 else
041c3194
ZW
1495 flags = PREV_WHITE;
1496
1497 cur = buffer->cur;
1498 break;
1499 }
1500 }
1501 }
1502 cur_token++;
1503 break;
1504
1505 case '*':
1506 cur_token->type = CPP_MULT;
1507 if (IMMED_TOKEN ())
45b966db 1508 {
041c3194
ZW
1509 if (PREV_TOKEN_TYPE == CPP_DIV)
1510 {
1511 buffer->cur = cur;
1512#if 0 /* Leave until new lexer in place. */
1513 if (cur[-2] != '/')
1514 cpp_warning (pfile,
1515 "comment start '/*' split across lines");
1516#endif
1517 if (skip_block_comment (pfile))
1518 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1519 "unterminated comment");
1520#if 0 /* Leave until new lexer in place. */
1521 else if (buffer->cur[-2] != '*')
1522 cpp_warning (pfile,
1523 "comment end '*/' split across lines");
1524#endif
1525 /* Back up to opening '/'. */
1526 cur_token--;
1527 if (!CPP_OPTION (pfile, discard_comments)
91fcd158 1528 && (!KNOWN_DIRECTIVE()
041c3194
ZW
1529 || (list->directive->flags & COMMENTS)))
1530 save_comment (list, cur_token++, cur,
1531 buffer->cur - cur, c);
f9a0e96c 1532 else
041c3194
ZW
1533 flags = PREV_WHITE;
1534
1535 cur = buffer->cur;
1536 break;
1537 }
1538 else if (CPP_OPTION (pfile, cplusplus))
1539 {
1540 /* In C++, there are .* and ->* operators. */
1541 if (PREV_TOKEN_TYPE == CPP_DEREF)
1542 BACKUP_TOKEN (CPP_DEREF_STAR);
1543 else if (PREV_TOKEN_TYPE == CPP_DOT)
1544 BACKUP_TOKEN (CPP_DOT_STAR);
1545 }
45b966db 1546 }
041c3194 1547 cur_token++;
f8f769ea 1548 break;
45b966db 1549
041c3194
ZW
1550 case '\n':
1551 case '\r':
1552 handle_newline (cur, buffer->rlimit, c);
1553 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
45b966db 1554 {
041c3194 1555 if (IMMED_TOKEN ())
45b966db 1556 {
041c3194
ZW
1557 /* Remove the escaped newline. Then continue to process
1558 any interrupted name or number. */
1559 cur_token--;
1560 /* Backslash-newline may not be immediately followed by
1561 EOF (C99 5.1.1.2). */
1562 if (cur >= buffer->rlimit)
1563 {
1564 cpp_pedwarn (pfile, "backslash-newline at end of file");
1565 break;
1566 }
1567 if (IMMED_TOKEN ())
1568 {
1569 cur_token--;
1570 if (cur_token->type == CPP_NAME)
1571 goto continue_name;
1572 else if (cur_token->type == CPP_NUMBER)
1573 goto continue_number;
1574 cur_token++;
1575 }
1576 /* Remember whitespace setting. */
1577 flags = cur_token->flags;
f8f769ea 1578 break;
45b966db 1579 }
041c3194
ZW
1580 else
1581 {
1582 buffer->cur = cur;
1583 cpp_warning (pfile,
1584 "backslash and newline separated by space");
1585 }
1586 }
1587 else if (MIGHT_BE_DIRECTIVE ())
1588 {
1589 /* "Null directive." C99 6.10.7: A preprocessing
1590 directive of the form # <new-line> has no effect.
1591
1592 But it is still a directive, and therefore disappears
1593 from the output. */
1594 cur_token--;
f9a0e96c
ZW
1595 if (cur_token->flags & PREV_WHITE
1596 && CPP_WTRADITIONAL (pfile))
1597 cpp_warning (pfile, "K+R C ignores #\\n with the # indented");
f8f769ea 1598 }
45b966db 1599
041c3194
ZW
1600 /* Skip vertical space until we have at least one token to
1601 return. */
1602 if (cur_token != &list->tokens[first_token])
1603 goto out;
1604 list->line = CPP_BUF_LINE (buffer);
f8f769ea 1605 break;
04e3ec78 1606
041c3194
ZW
1607 case '-':
1608 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
f9a0e96c 1609 REVISE_TOKEN (CPP_MINUS_MINUS);
041c3194
ZW
1610 else
1611 PUSH_TOKEN (CPP_MINUS);
1612 break;
45b966db 1613
041c3194
ZW
1614 make_hash:
1615 case '#':
1616 /* The digraph flag checking ensures that ## and %:%:
1617 are interpreted as CPP_PASTE, but #%: and %:# are not. */
1618 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1619 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1620 REVISE_TOKEN (CPP_PASTE);
1621 else
1622 PUSH_TOKEN (CPP_HASH);
1623 break;
04e3ec78 1624
041c3194
ZW
1625 case ':':
1626 cur_token->type = CPP_COLON;
1627 if (IMMED_TOKEN ())
1628 {
1629 if (PREV_TOKEN_TYPE == CPP_COLON
1630 && CPP_OPTION (pfile, cplusplus))
1631 BACKUP_TOKEN (CPP_SCOPE);
9b55f29a 1632 else if (CPP_OPTION (pfile, digraphs))
041c3194 1633 {
9b55f29a
NB
1634 /* Digraph: "<:" is a '[' */
1635 if (PREV_TOKEN_TYPE == CPP_LESS)
1636 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1637 /* Digraph: "%:" is a '#' */
1638 else if (PREV_TOKEN_TYPE == CPP_MOD)
1639 {
1640 (--cur_token)->flags |= DIGRAPH;
1641 goto make_hash;
1642 }
041c3194
ZW
1643 }
1644 }
1645 cur_token++;
1646 break;
f8f769ea 1647
041c3194
ZW
1648 case '&':
1649 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1650 REVISE_TOKEN (CPP_AND_AND);
1651 else
1652 PUSH_TOKEN (CPP_AND);
f8f769ea 1653 break;
45b966db 1654
041c3194
ZW
1655 make_or:
1656 case '|':
1657 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1658 REVISE_TOKEN (CPP_OR_OR);
1659 else
1660 PUSH_TOKEN (CPP_OR);
1661 break;
45b966db 1662
041c3194
ZW
1663 case '+':
1664 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1665 REVISE_TOKEN (CPP_PLUS_PLUS);
1666 else
1667 PUSH_TOKEN (CPP_PLUS);
1668 break;
45b966db 1669
041c3194
ZW
1670 case '=':
1671 /* This relies on equidistance of "?=" and "?" tokens. */
1672 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1673 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1674 else
1675 PUSH_TOKEN (CPP_EQ);
1676 break;
45b966db 1677
041c3194
ZW
1678 case '>':
1679 cur_token->type = CPP_GREATER;
1680 if (IMMED_TOKEN ())
1681 {
1682 if (PREV_TOKEN_TYPE == CPP_GREATER)
1683 BACKUP_TOKEN (CPP_RSHIFT);
1684 else if (PREV_TOKEN_TYPE == CPP_MINUS)
1685 BACKUP_TOKEN (CPP_DEREF);
9b55f29a
NB
1686 else if (CPP_OPTION (pfile, digraphs))
1687 {
1688 /* Digraph: ":>" is a ']' */
1689 if (PREV_TOKEN_TYPE == CPP_COLON)
1690 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1691 /* Digraph: "%>" is a '}' */
1692 else if (PREV_TOKEN_TYPE == CPP_MOD)
1693 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1694 }
041c3194
ZW
1695 }
1696 cur_token++;
1697 break;
1698
1699 case '<':
1700 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1701 {
1702 REVISE_TOKEN (CPP_LSHIFT);
1703 break;
1704 }
1705 /* Is this the beginning of a header name? */
91fcd158 1706 if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
041c3194
ZW
1707 {
1708 c = '>'; /* Terminator. */
1709 cur_token->type = CPP_HEADER_NAME;
1710 goto do_parse_string;
1711 }
1712 PUSH_TOKEN (CPP_LESS);
1713 break;
45b966db 1714
041c3194
ZW
1715 case '%':
1716 /* Digraph: "<%" is a '{' */
1717 cur_token->type = CPP_MOD;
9b55f29a
NB
1718 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1719 && CPP_OPTION (pfile, digraphs))
041c3194
ZW
1720 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1721 cur_token++;
1722 break;
04e3ec78 1723
041c3194
ZW
1724 case '?':
1725 if (cur + 1 < buffer->rlimit && *cur == '?'
1726 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1727 {
1728 /* Handle trigraph. */
1729 cur++;
1730 switch (*cur++)
1731 {
1732 case '(': goto make_open_square;
1733 case ')': goto make_close_square;
1734 case '<': goto make_open_brace;
1735 case '>': goto make_close_brace;
1736 case '=': goto make_hash;
1737 case '!': goto make_or;
1738 case '-': goto make_complement;
1739 case '/': goto make_backslash;
1740 case '\'': goto make_xor;
1741 }
1742 }
1743 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1744 {
1745 /* GNU C++ defines <? and >? operators. */
1746 if (PREV_TOKEN_TYPE == CPP_LESS)
1747 {
1748 REVISE_TOKEN (CPP_MIN);
1749 break;
1750 }
1751 else if (PREV_TOKEN_TYPE == CPP_GREATER)
1752 {
1753 REVISE_TOKEN (CPP_MAX);
1754 break;
1755 }
1756 }
1757 PUSH_TOKEN (CPP_QUERY);
1758 break;
45b966db 1759
041c3194
ZW
1760 case '.':
1761 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1762 && IMMED_TOKEN ()
1763 && !(cur_token[-1].flags & PREV_WHITE))
1764 {
1765 cur_token -= 2;
1766 PUSH_TOKEN (CPP_ELLIPSIS);
1767 }
1768 else
1769 PUSH_TOKEN (CPP_DOT);
1770 break;
c5a04734 1771
041c3194
ZW
1772 make_complement:
1773 case '~': PUSH_TOKEN (CPP_COMPL); break;
1774 make_xor:
1775 case '^': PUSH_TOKEN (CPP_XOR); break;
1776 make_open_brace:
1777 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1778 make_close_brace:
1779 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1780 make_open_square:
1781 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1782 make_close_square:
1783 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1784 make_backslash:
1785 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1786 case '!': PUSH_TOKEN (CPP_NOT); break;
1787 case ',': PUSH_TOKEN (CPP_COMMA); break;
1788 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1789 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1790 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
6d2c2047 1791
041c3194
ZW
1792 case '$':
1793 if (CPP_OPTION (pfile, dollars_in_ident))
1794 goto letter;
1795 /* Fall through */
041c3194
ZW
1796 default:
1797 cur_token->val.aux = c;
1798 PUSH_TOKEN (CPP_OTHER);
1799 break;
1800 }
1801 }
6d2c2047 1802
041c3194
ZW
1803 /* Run out of token space? */
1804 if (cur_token == token_limit)
1805 {
1806 list->tokens_used = cur_token - list->tokens;
1807 _cpp_expand_token_space (list, 256);
1808 goto expanded;
1809 }
6d2c2047 1810
041c3194
ZW
1811 cur_token->flags = flags;
1812 if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1813 {
91fcd158 1814 if (cur > buffer->buf && !is_vspace (cur[-1]))
041c3194
ZW
1815 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1816 CPP_BUF_COLUMN (buffer, cur),
1817 "no newline at end of file");
1818 cur_token++->type = CPP_EOF;
1819 }
6d2c2047 1820
041c3194
ZW
1821 out:
1822 /* All tokens are allocated, so the memory location is fixed. */
1823 first = &list->tokens[first_token];
1824
1825 /* Don't complain about the null directive, nor directives in
1826 assembly source: we don't know where the comments are, and # may
1827 introduce assembler pseudo-ops. Don't complain about invalid
1828 directives in skipped conditional groups (6.10 p4). */
1829 if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1830 && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1831 {
1832 if (first[1].type == CPP_NAME)
1833 cpp_error (pfile, "invalid preprocessing directive #%.*s",
bfb9dc7f 1834 (int) first[1].val.node->length, first[1].val.node->name);
041c3194
ZW
1835 else
1836 cpp_error (pfile, "invalid preprocessing directive");
1837 }
1838
91fcd158
NB
1839 /* Put EOF at end of known directives. This covers "directives do
1840 not extend beyond the end of the line (description 6.10 part 2)". */
1841 if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
041c3194
ZW
1842 {
1843 pfile->first_directive_token = first;
1844 cur_token++->type = CPP_EOF;
1845 }
1846
91fcd158
NB
1847 /* Directives, known or not, always start a new line. */
1848 if (first_token == 0 || list->tokens[first_token].type == CPP_HASH)
041c3194 1849 first->flags |= BOL;
6d2c2047 1850 else
041c3194
ZW
1851 /* 6.10.3.10: Within the sequence of preprocessing tokens making
1852 up the invocation of a function-like macro, new line is
1853 considered a normal white-space character. */
1854 first->flags |= PREV_WHITE;
1855
1856 buffer->cur = cur;
1857 list->tokens_used = cur_token - list->tokens;
1858 pfile->in_lex_line = 0;
6d2c2047
ZW
1859}
1860
041c3194
ZW
1861/* Write the spelling of a token TOKEN, with any appropriate
1862 whitespace before it, to the token_buffer. PREV is the previous
1863 token, which is used to determine if we need to shove in an extra
1864 space in order to avoid accidental token paste. */
1865static void
1866output_token (pfile, token, prev)
1867 cpp_reader *pfile;
1868 const cpp_token *token, *prev;
1869{
1870 int dummy;
c5a04734 1871
041c3194
ZW
1872 if (token->col && (token->flags & BOL))
1873 {
1874 /* Supply enough whitespace to put this token in its original
1875 column. Don't bother trying to reconstruct tabs; we can't
1876 get it right in general, and nothing ought to care. (Yes,
1877 some things do care; the fault lies with them.) */
1878 unsigned char *buffer;
1879 unsigned int spaces = token->col - 1;
1880
1881 CPP_RESERVE (pfile, token->col);
1882 buffer = pfile->limit;
1883
1884 while (spaces--)
1885 *buffer++ = ' ';
1886 pfile->limit = buffer;
1887 }
1888 else if (token->flags & PREV_WHITE)
1889 CPP_PUTC (pfile, ' ');
f9a0e96c 1890 else if (prev)
041c3194 1891 {
f9a0e96c 1892 /* Check for and prevent accidental token pasting. */
041c3194
ZW
1893 if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1894 CPP_PUTC (pfile, ' ');
f9a0e96c 1895 /* can_paste doesn't catch all the accidental pastes.
041c3194
ZW
1896 Consider a + ++b - if there is not a space between the + and ++, it
1897 will be misparsed as a++ + b. */
1898 else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1899 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1900 CPP_PUTC (pfile, ' ');
1901 }
d6d5f795 1902
041c3194
ZW
1903 CPP_RESERVE (pfile, TOKEN_LEN (token));
1904 pfile->limit = spell_token (pfile, token, pfile->limit);
1905}
d6d5f795 1906
041c3194 1907/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1908 already contain the enough space to hold the token's spelling.
1909 Returns a pointer to the character after the last character
1910 written. */
d6d5f795 1911
041c3194
ZW
1912static unsigned char *
1913spell_token (pfile, token, buffer)
1914 cpp_reader *pfile; /* Would be nice to be rid of this... */
1915 const cpp_token *token;
1916 unsigned char *buffer;
1917{
1918 switch (token_spellings[token->type].type)
1919 {
1920 case SPELL_OPERATOR:
1921 {
1922 const unsigned char *spelling;
1923 unsigned char c;
d6d5f795 1924
041c3194
ZW
1925 if (token->flags & DIGRAPH)
1926 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1927 else
1928 spelling = token_spellings[token->type].spelling;
1929
1930 while ((c = *spelling++) != '\0')
1931 *buffer++ = c;
1932 }
1933 break;
d6d5f795 1934
041c3194 1935 case SPELL_IDENT:
bfb9dc7f
ZW
1936 memcpy (buffer, token->val.node->name, token->val.node->length);
1937 buffer += token->val.node->length;
041c3194 1938 break;
d6d5f795 1939
041c3194
ZW
1940 case SPELL_STRING:
1941 {
041c3194
ZW
1942 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1943 *buffer++ = 'L';
bfb9dc7f 1944
041c3194 1945 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
bfb9dc7f
ZW
1946 *buffer++ = '"';
1947 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1948 *buffer++ = '\'';
1949
1950 memcpy (buffer, token->val.str.text, token->val.str.len);
1951 buffer += token->val.str.len;
1952
1953 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1954 *buffer++ = '"';
1955 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1956 *buffer++ = '\'';
041c3194
ZW
1957 }
1958 break;
d6d5f795 1959
041c3194
ZW
1960 case SPELL_CHAR:
1961 *buffer++ = token->val.aux;
1962 break;
d6d5f795 1963
041c3194
ZW
1964 case SPELL_NONE:
1965 cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
1966 break;
1967 }
d6d5f795 1968
041c3194
ZW
1969 return buffer;
1970}
d6d5f795 1971
cf00a885
ZW
1972/* Return the spelling of a token known to be an operator.
1973 Does not distinguish digraphs from their counterparts. */
1974const unsigned char *
1975_cpp_spell_operator (type)
1976 enum cpp_ttype type;
1977{
1978 if (token_spellings[type].type == SPELL_OPERATOR)
1979 return token_spellings[type].spelling;
1980 else
1981 return token_names[type];
1982}
1983
1984
041c3194 1985/* Macro expansion algorithm. TODO. */
d6d5f795 1986
041c3194
ZW
1987
1988/* Free the storage allocated for macro arguments. */
1989static void
1990free_macro_args (args)
1991 macro_args *args;
c5a04734 1992{
041c3194 1993 if (args->tokens)
417f3e3a 1994 free ((PTR) args->tokens);
041c3194
ZW
1995 free (args->ends);
1996 free (args);
c5a04734
ZW
1997}
1998
041c3194
ZW
1999/* Determines if a macro has been already used (and is therefore
2000 disabled). */
c5a04734 2001static int
041c3194 2002is_macro_disabled (pfile, expansion, token)
c5a04734 2003 cpp_reader *pfile;
041c3194
ZW
2004 const cpp_toklist *expansion;
2005 const cpp_token *token;
c5a04734 2006{
041c3194
ZW
2007 cpp_context *context = CURRENT_CONTEXT (pfile);
2008
cf00a885
ZW
2009 /* Don't expand anything if this file has already been preprocessed. */
2010 if (CPP_OPTION (pfile, preprocessed))
2011 return 1;
2012
041c3194
ZW
2013 /* Arguments on either side of ## are inserted in place without
2014 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
2015 occurs during a later rescan pass. The effect is that we expand
2016 iff we would as part of the macro's expansion list, so we should
2017 drop to the macro's context. */
2018 if (IS_ARG_CONTEXT (context))
c5a04734 2019 {
041c3194
ZW
2020 if (token->flags & PASTED)
2021 context--;
2022 else if (!(context->flags & CONTEXT_RAW))
2023 return 1;
2024 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2025 context--;
c5a04734 2026 }
c5a04734 2027
041c3194
ZW
2028 /* Have we already used this macro? */
2029 while (context->level > 0)
c5a04734 2030 {
041c3194
ZW
2031 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2032 return 1;
2033 /* Raw argument tokens are judged based on the token list they
2034 came from. */
2035 if (context->flags & CONTEXT_RAW)
2036 context = pfile->contexts + context->level;
2037 else
2038 context--;
2039 }
c5a04734 2040
041c3194
ZW
2041 /* Function-like macros may be disabled if the '(' is not in the
2042 current context. We check this without disrupting the context
2043 stack. */
2044 if (expansion->paramc >= 0)
2045 {
2046 const cpp_token *next;
2047 unsigned int prev_nme;
c5a04734 2048
041c3194
ZW
2049 context = CURRENT_CONTEXT (pfile);
2050 /* Drop down any contexts we're at the end of: the '(' may
2051 appear in lower macro expansions, or in the rest of the file. */
2052 while (context->posn == context->count && context > pfile->contexts)
2053 {
2054 context--;
2055 /* If we matched, we are disabled, as we appear in the
2056 expansion of each macro we meet. */
2057 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2058 return 1;
2059 }
c5a04734 2060
041c3194
ZW
2061 prev_nme = pfile->no_expand_level;
2062 pfile->no_expand_level = context - pfile->contexts;
417f3e3a 2063 next = _cpp_get_token (pfile);
041c3194
ZW
2064 restore_macro_expansion (pfile, prev_nme);
2065 if (next->type != CPP_OPEN_PAREN)
2066 {
2067 _cpp_push_token (pfile, next);
b9bf5af8 2068 if (CPP_WTRADITIONAL (pfile))
041c3194
ZW
2069 cpp_warning (pfile,
2070 "function macro %.*s must be used with arguments in traditional C",
bfb9dc7f 2071 (int) token->val.node->length, token->val.node->name);
041c3194
ZW
2072 return 1;
2073 }
c5a04734 2074 }
c5a04734 2075
041c3194 2076 return 0;
c5a04734
ZW
2077}
2078
041c3194
ZW
2079/* Add a token to the set of tokens forming the arguments to the macro
2080 being parsed in parse_args. */
2081static void
2082save_token (args, token)
2083 macro_args *args;
2084 const cpp_token *token;
c5a04734 2085{
041c3194
ZW
2086 if (args->used == args->capacity)
2087 {
2088 args->capacity += args->capacity + 100;
2089 args->tokens = (const cpp_token **)
417f3e3a
ZW
2090 xrealloc ((PTR) args->tokens,
2091 args->capacity * sizeof (const cpp_token *));
041c3194
ZW
2092 }
2093 args->tokens[args->used++] = token;
c5a04734
ZW
2094}
2095
041c3194
ZW
2096/* Take and save raw tokens until we finish one argument. Empty
2097 arguments are saved as a single CPP_PLACEMARKER token. */
2098static const cpp_token *
2099parse_arg (pfile, var_args, paren_context, args, pcount)
c5a04734 2100 cpp_reader *pfile;
041c3194
ZW
2101 int var_args;
2102 unsigned int paren_context;
2103 macro_args *args;
2104 unsigned int *pcount;
c5a04734 2105{
041c3194
ZW
2106 const cpp_token *token;
2107 unsigned int paren = 0, count = 0;
2108 int raw, was_raw = 1;
c5a04734 2109
041c3194 2110 for (count = 0;; count++)
c5a04734 2111 {
417f3e3a 2112 token = _cpp_get_token (pfile);
c5a04734 2113
041c3194 2114 switch (token->type)
c5a04734 2115 {
041c3194
ZW
2116 default:
2117 break;
c5a04734 2118
041c3194
ZW
2119 case CPP_OPEN_PAREN:
2120 paren++;
2121 break;
2122
2123 case CPP_CLOSE_PAREN:
2124 if (paren-- != 0)
2125 break;
2126 goto out;
2127
2128 case CPP_COMMA:
2129 /* Commas are not terminators within parantheses or var_args. */
2130 if (paren || var_args)
2131 break;
2132 goto out;
2133
2134 case CPP_EOF: /* Error reported by caller. */
2135 goto out;
c5a04734 2136 }
c5a04734 2137
041c3194
ZW
2138 raw = pfile->cur_context <= paren_context;
2139 if (raw != was_raw)
2140 {
2141 was_raw = raw;
2142 save_token (args, 0);
2143 count++;
c5a04734 2144 }
041c3194 2145 save_token (args, token);
c5a04734 2146 }
c5a04734
ZW
2147
2148 out:
041c3194
ZW
2149 if (count == 0)
2150 {
2151 /* Duplicate the placemarker. Then we can set its flags and
2152 position and safely be using more than one. */
2153 save_token (args, duplicate_token (pfile, &placemarker_token));
2154 count++;
2155 }
2156
2157 *pcount = count;
2158 return token;
c5a04734
ZW
2159}
2160
041c3194
ZW
2161/* This macro returns true if the argument starting at offset O of arglist
2162 A is empty - that is, it's either a single PLACEMARKER token, or a null
2163 pointer followed by a PLACEMARKER. */
2164
2165#define empty_argument(A, O) \
2166 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2167 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2168
2169/* Parse the arguments making up a macro invocation. Nested arguments
2170 are automatically macro expanded, but immediate macros are not
2171 expanded; this enables e.g. operator # to work correctly. Returns
2172 non-zero on error. */
c5a04734 2173static int
041c3194 2174parse_args (pfile, hp, args)
c5a04734 2175 cpp_reader *pfile;
041c3194
ZW
2176 cpp_hashnode *hp;
2177 macro_args *args;
c5a04734 2178{
041c3194
ZW
2179 const cpp_token *token;
2180 const cpp_toklist *macro;
2181 unsigned int total = 0;
2182 unsigned int paren_context = pfile->cur_context;
2183 int argc = 0;
2184
2185 macro = hp->value.expansion;
2186 do
c5a04734 2187 {
041c3194 2188 unsigned int count;
c5a04734 2189
041c3194
ZW
2190 token = parse_arg (pfile, (argc + 1 == macro->paramc
2191 && (macro->flags & VAR_ARGS)),
2192 paren_context, args, &count);
2193 if (argc < macro->paramc)
c5a04734 2194 {
041c3194
ZW
2195 total += count;
2196 args->ends[argc] = total;
c5a04734 2197 }
041c3194 2198 argc++;
c5a04734 2199 }
041c3194 2200 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
c5a04734 2201
041c3194
ZW
2202 if (token->type == CPP_EOF)
2203 {
2204 cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2205 hp->length, hp->name);
2206 return 1;
2207 }
2208 else if (argc < macro->paramc)
2209 {
2210 /* A rest argument is allowed to not appear in the invocation at all.
2211 e.g. #define debug(format, args...) ...
2212 debug("string");
2213 This is exactly the same as if the rest argument had received no
563dd08a 2214 tokens - debug("string",); This extension is deprecated. */
041c3194 2215
563dd08a 2216 if (argc + 1 == macro->paramc && (macro->flags & GNU_REST_ARGS))
041c3194
ZW
2217 {
2218 /* Duplicate the placemarker. Then we can set its flags and
2219 position and safely be using more than one. */
2220 save_token (args, duplicate_token (pfile, &placemarker_token));
2221 args->ends[argc] = total + 1;
2222 return 0;
2223 }
2224 else
2225 {
2226 cpp_error (pfile,
2227 "insufficient arguments in invocation of macro \"%.*s\"",
2228 hp->length, hp->name);
2229 return 1;
2230 }
2231 }
2232 /* An empty argument to an empty function-like macro is fine. */
2233 else if (argc > macro->paramc
2234 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2235 {
2236 cpp_error (pfile,
2237 "too many arguments in invocation of macro \"%.*s\"",
2238 hp->length, hp->name);
2239 return 1;
2240 }
c5a04734 2241
041c3194
ZW
2242 return 0;
2243}
2244
2245/* Adds backslashes before all backslashes and double quotes appearing
2246 in strings. Non-printable characters are converted to octal. */
2247static U_CHAR *
2248quote_string (dest, src, len)
2249 U_CHAR *dest;
2250 const U_CHAR *src;
2251 unsigned int len;
2252{
2253 while (len--)
c5a04734 2254 {
041c3194 2255 U_CHAR c = *src++;
c5a04734 2256
041c3194 2257 if (c == '\\' || c == '"')
6ab3e7dd 2258 {
041c3194
ZW
2259 *dest++ = '\\';
2260 *dest++ = c;
2261 }
2262 else
2263 {
2264 if (ISPRINT (c))
2265 *dest++ = c;
2266 else
2267 {
2268 sprintf ((char *) dest, "\\%03o", c);
2269 dest += 4;
2270 }
6ab3e7dd 2271 }
c5a04734 2272 }
c5a04734 2273
041c3194 2274 return dest;
c5a04734
ZW
2275}
2276
041c3194
ZW
2277/* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2278 CPP_STRING token containing TEXT in quoted form. */
2279static cpp_token *
2280make_string_token (token, text, len)
2281 cpp_token *token;
2282 const U_CHAR *text;
2283 unsigned int len;
2284{
2285 U_CHAR *buf;
2286
2287 buf = (U_CHAR *) xmalloc (len * 4);
2288 token->type = CPP_STRING;
2289 token->flags = 0;
bfb9dc7f
ZW
2290 token->val.str.text = buf;
2291 token->val.str.len = quote_string (buf, text, len) - buf;
041c3194
ZW
2292 return token;
2293}
2294
2295/* Allocates and converts a temporary token to a CPP_NUMBER token,
2296 evaluating to NUMBER. */
2297static cpp_token *
2298alloc_number_token (pfile, number)
c5a04734 2299 cpp_reader *pfile;
041c3194 2300 int number;
c5a04734 2301{
041c3194
ZW
2302 cpp_token *result;
2303 char *buf;
2304
2305 result = get_temp_token (pfile);
2306 buf = xmalloc (20);
2307 sprintf (buf, "%d", number);
2308
2309 result->type = CPP_NUMBER;
2310 result->flags = 0;
bfb9dc7f
ZW
2311 result->val.str.text = (U_CHAR *) buf;
2312 result->val.str.len = strlen (buf);
041c3194
ZW
2313 return result;
2314}
c5a04734 2315
041c3194
ZW
2316/* Returns a temporary token from the temporary token store of PFILE. */
2317static cpp_token *
2318get_temp_token (pfile)
2319 cpp_reader *pfile;
2320{
2321 if (pfile->temp_used == pfile->temp_alloced)
2322 {
2323 if (pfile->temp_used == pfile->temp_cap)
2324 {
2325 pfile->temp_cap += pfile->temp_cap + 20;
2326 pfile->temp_tokens = (cpp_token **) xrealloc
2327 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2328 }
2329 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2330 (sizeof (cpp_token));
2331 }
c5a04734 2332
041c3194
ZW
2333 return pfile->temp_tokens[pfile->temp_used++];
2334}
2335
2336/* Release (not free) for re-use the temporary tokens of PFILE. */
2337static void
2338release_temp_tokens (pfile)
2339 cpp_reader *pfile;
2340{
2341 while (pfile->temp_used)
c5a04734 2342 {
041c3194 2343 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
c5a04734 2344
bfb9dc7f 2345 if (token_spellings[token->type].type == SPELL_STRING)
c5a04734 2346 {
bfb9dc7f
ZW
2347 free ((char *) token->val.str.text);
2348 token->val.str.text = 0;
c5a04734
ZW
2349 }
2350 }
041c3194 2351}
c5a04734 2352
041c3194
ZW
2353/* Free all of PFILE's dynamically-allocated temporary tokens. */
2354void
2355_cpp_free_temp_tokens (pfile)
2356 cpp_reader *pfile;
2357{
2358 if (pfile->temp_tokens)
c5a04734 2359 {
041c3194
ZW
2360 /* It is possible, though unlikely (looking for '(' of a funlike
2361 macro into EOF), that we haven't released the tokens yet. */
2362 release_temp_tokens (pfile);
2363 while (pfile->temp_alloced)
2364 free (pfile->temp_tokens[--pfile->temp_alloced]);
2365 free (pfile->temp_tokens);
c5a04734
ZW
2366 }
2367
041c3194
ZW
2368 if (pfile->date)
2369 {
bfb9dc7f 2370 free ((char *) pfile->date->val.str.text);
041c3194 2371 free (pfile->date);
bfb9dc7f 2372 free ((char *) pfile->time->val.str.text);
041c3194
ZW
2373 free (pfile->time);
2374 }
c5a04734
ZW
2375}
2376
041c3194
ZW
2377/* Copy TOKEN into a temporary token from PFILE's store. */
2378static cpp_token *
2379duplicate_token (pfile, token)
2380 cpp_reader *pfile;
2381 const cpp_token *token;
2382{
2383 cpp_token *result = get_temp_token (pfile);
c5a04734 2384
041c3194 2385 *result = *token;
bfb9dc7f 2386 if (token_spellings[token->type].type == SPELL_STRING)
041c3194 2387 {
bfb9dc7f
ZW
2388 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2389 memcpy (buff, token->val.str.text, token->val.str.len);
2390 result->val.str.text = buff;
041c3194
ZW
2391 }
2392 return result;
2393}
c5a04734 2394
041c3194
ZW
2395/* Determine whether two tokens can be pasted together, and if so,
2396 what the resulting token is. Returns CPP_EOF if the tokens cannot
2397 be pasted, or the appropriate type for the merged token if they
2398 can. */
2399static enum cpp_ttype
2400can_paste (pfile, token1, token2, digraph)
2401 cpp_reader * pfile;
2402 const cpp_token *token1, *token2;
2403 int* digraph;
c5a04734 2404{
041c3194
ZW
2405 enum cpp_ttype a = token1->type, b = token2->type;
2406 int cxx = CPP_OPTION (pfile, cplusplus);
c5a04734 2407
041c3194
ZW
2408 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2409 return a + (CPP_EQ_EQ - CPP_EQ);
c5a04734 2410
041c3194 2411 switch (a)
c5a04734 2412 {
041c3194
ZW
2413 case CPP_GREATER:
2414 if (b == a) return CPP_RSHIFT;
2415 if (b == CPP_QUERY && cxx) return CPP_MAX;
2416 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2417 break;
2418 case CPP_LESS:
2419 if (b == a) return CPP_LSHIFT;
2420 if (b == CPP_QUERY && cxx) return CPP_MIN;
2421 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
9b55f29a
NB
2422 if (CPP_OPTION (pfile, digraphs))
2423 {
2424 if (b == CPP_COLON)
2425 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2426 if (b == CPP_MOD)
2427 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2428 }
041c3194 2429 break;
c5a04734 2430
041c3194
ZW
2431 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2432 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2433 case CPP_OR: if (b == a) return CPP_OR_OR; break;
c5a04734 2434
041c3194
ZW
2435 case CPP_MINUS:
2436 if (b == a) return CPP_MINUS_MINUS;
2437 if (b == CPP_GREATER) return CPP_DEREF;
2438 break;
2439 case CPP_COLON:
2440 if (b == a && cxx) return CPP_SCOPE;
9b55f29a 2441 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
041c3194
ZW
2442 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2443 break;
2444
2445 case CPP_MOD:
9b55f29a
NB
2446 if (CPP_OPTION (pfile, digraphs))
2447 {
2448 if (b == CPP_GREATER)
2449 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2450 if (b == CPP_COLON)
2451 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2452 }
041c3194
ZW
2453 break;
2454 case CPP_DEREF:
2455 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2456 break;
2457 case CPP_DOT:
2458 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2459 if (b == CPP_NUMBER) return CPP_NUMBER;
2460 break;
2461
2462 case CPP_HASH:
2463 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2464 /* %:%: digraph */
2465 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2466 break;
2467
2468 case CPP_NAME:
2469 if (b == CPP_NAME) return CPP_NAME;
2470 if (b == CPP_NUMBER
bfb9dc7f 2471 && is_numstart(token2->val.str.text[0])) return CPP_NAME;
041c3194 2472 if (b == CPP_CHAR
bfb9dc7f 2473 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
041c3194 2474 if (b == CPP_STRING
bfb9dc7f 2475 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
041c3194
ZW
2476 break;
2477
2478 case CPP_NUMBER:
2479 if (b == CPP_NUMBER) return CPP_NUMBER;
2480 if (b == CPP_NAME) return CPP_NUMBER;
2481 if (b == CPP_DOT) return CPP_NUMBER;
2482 /* Numbers cannot have length zero, so this is safe. */
2483 if ((b == CPP_PLUS || b == CPP_MINUS)
bfb9dc7f 2484 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
041c3194
ZW
2485 return CPP_NUMBER;
2486 break;
2487
2488 default:
2489 break;
c5a04734
ZW
2490 }
2491
041c3194
ZW
2492 return CPP_EOF;
2493}
2494
2495/* Check if TOKEN is to be ##-pasted with the token after it. */
2496static const cpp_token *
2497maybe_paste_with_next (pfile, token)
2498 cpp_reader *pfile;
2499 const cpp_token *token;
2500{
2501 cpp_token *pasted;
2502 const cpp_token *second;
2503 cpp_context *context = CURRENT_CONTEXT (pfile);
2504
2505 /* Is this token on the LHS of ## ? */
041c3194 2506
417f3e3a
ZW
2507 while ((token->flags & PASTE_LEFT)
2508 || ((context->flags & CONTEXT_PASTEL)
2509 && context->posn == context->count))
c5a04734 2510 {
417f3e3a
ZW
2511 /* Suppress macro expansion for next token, but don't conflict
2512 with the other method of suppression. If it is an argument,
2513 macro expansion within the argument will still occur. */
2514 pfile->paste_level = pfile->cur_context;
2515 second = _cpp_get_token (pfile);
2516 pfile->paste_level = 0;
2517
2518 /* Ignore placemarker argument tokens (cannot be from an empty
2519 macro since macros are not expanded). */
2520 if (token->type == CPP_PLACEMARKER)
2521 pasted = duplicate_token (pfile, second);
2522 else if (second->type == CPP_PLACEMARKER)
041c3194 2523 {
417f3e3a
ZW
2524 cpp_context *mac_context = CURRENT_CONTEXT (pfile) - 1;
2525 /* GCC has special extended semantics for a ## b where b is
2526 a varargs parameter: a disappears if b consists of no
2527 tokens. This extension is deprecated. */
2528 if ((mac_context->u.list->flags & GNU_REST_ARGS)
2529 && (mac_context->u.list->tokens[mac_context->posn-1].val.aux + 1
2530 == (unsigned) mac_context->u.list->paramc))
2531 {
2532 cpp_warning (pfile, "deprecated GNU ## extension used");
2533 pasted = duplicate_token (pfile, second);
2534 }
2535 else
2536 pasted = duplicate_token (pfile, token);
041c3194
ZW
2537 }
2538 else
041c3194 2539 {
417f3e3a
ZW
2540 int digraph = 0;
2541 enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
c5a04734 2542
417f3e3a
ZW
2543 if (type == CPP_EOF)
2544 {
2545 if (CPP_OPTION (pfile, warn_paste))
2546 cpp_warning (pfile,
2547 "pasting would not give a valid preprocessing token");
2548 _cpp_push_token (pfile, second);
2549 return token;
2550 }
c5a04734 2551
417f3e3a
ZW
2552 if (type == CPP_NAME || type == CPP_NUMBER)
2553 {
2554 /* Join spellings. */
2555 U_CHAR *buf, *end;
2556
2557 pasted = get_temp_token (pfile);
2558 buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2559 end = spell_token (pfile, token, buf);
2560 end = spell_token (pfile, second, end);
2561 *end = '\0';
041c3194 2562
417f3e3a
ZW
2563 if (type == CPP_NAME)
2564 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2565 else
2566 {
2567 pasted->val.str.text = uxstrdup (buf);
2568 pasted->val.str.len = end - buf;
2569 }
2570 }
2571 else if (type == CPP_WCHAR || type == CPP_WSTRING)
2572 pasted = duplicate_token (pfile, second);
bfb9dc7f
ZW
2573 else
2574 {
417f3e3a
ZW
2575 pasted = get_temp_token (pfile);
2576 pasted->val.integer = 0;
bfb9dc7f 2577 }
417f3e3a
ZW
2578
2579 pasted->type = type;
2580 pasted->flags = digraph ? DIGRAPH : 0;
041c3194
ZW
2581 }
2582
417f3e3a
ZW
2583 /* The pasted token gets the whitespace flags and position of the
2584 first token, the PASTE_LEFT flag of the second token, plus the
2585 PASTED flag to indicate it is the result of a paste. However, we
2586 want to preserve the DIGRAPH flag. */
2587 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2588 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2589 | (second->flags & PASTE_LEFT) | PASTED);
2590 pasted->col = token->col;
2591 pasted->line = token->line;
2592
2593 /* See if there is another token to be pasted onto the one we just
2594 constructed. */
2595 token = pasted;
2596 context = CURRENT_CONTEXT (pfile);
2597 /* and loop */
041c3194 2598 }
417f3e3a 2599 return token;
041c3194
ZW
2600}
2601
2602/* Convert a token sequence to a single string token according to the
2603 rules of the ISO C #-operator. */
2604#define INIT_SIZE 200
2605static cpp_token *
2606stringify_arg (pfile, token)
c5a04734 2607 cpp_reader *pfile;
041c3194 2608 const cpp_token *token;
c5a04734 2609{
041c3194
ZW
2610 cpp_token *result;
2611 unsigned char *main_buf;
2612 unsigned int prev_value, backslash_count = 0;
2613 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
c5a04734 2614
417f3e3a 2615 push_arg_context (pfile, token);
041c3194
ZW
2616 prev_value = prevent_macro_expansion (pfile);
2617 main_buf = (unsigned char *) xmalloc (buf_cap);
c5a04734 2618
041c3194
ZW
2619 result = get_temp_token (pfile);
2620 ASSIGN_FLAGS_AND_POS (result, token);
2621
417f3e3a 2622 for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
c5a04734 2623 {
041c3194
ZW
2624 int escape;
2625 unsigned char *buf;
2626 unsigned int len = TOKEN_LEN (token);
c5a04734 2627
041c3194
ZW
2628 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2629 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2630 if (escape)
2631 len *= 4 + 1;
2632
2633 if (buf_used + len > buf_cap)
c5a04734 2634 {
041c3194
ZW
2635 buf_cap = buf_used + len + INIT_SIZE;
2636 main_buf = xrealloc (main_buf, buf_cap);
2637 }
c5a04734 2638
041c3194
ZW
2639 if (whitespace && (token->flags & PREV_WHITE))
2640 main_buf[buf_used++] = ' ';
c5a04734 2641
041c3194
ZW
2642 if (escape)
2643 buf = (unsigned char *) xmalloc (len);
2644 else
2645 buf = main_buf + buf_used;
2646
2647 len = spell_token (pfile, token, buf) - buf;
2648 if (escape)
2649 {
2650 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2651 free (buf);
2652 }
2653 else
2654 buf_used += len;
c5a04734 2655
041c3194
ZW
2656 whitespace = 1;
2657 if (token->type == CPP_BACKSLASH)
2658 backslash_count++;
2659 else
2660 backslash_count = 0;
2661 }
c5a04734 2662
041c3194
ZW
2663 /* Ignore the final \ of invalid string literals. */
2664 if (backslash_count & 1)
2665 {
2666 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2667 buf_used--;
2668 }
c5a04734 2669
041c3194 2670 result->type = CPP_STRING;
bfb9dc7f
ZW
2671 result->val.str.text = main_buf;
2672 result->val.str.len = buf_used;
041c3194
ZW
2673 restore_macro_expansion (pfile, prev_value);
2674 return result;
2675}
c5a04734 2676
041c3194
ZW
2677/* Allocate more room on the context stack of PFILE. */
2678static void
2679expand_context_stack (pfile)
2680 cpp_reader *pfile;
2681{
2682 pfile->context_cap += pfile->context_cap + 20;
2683 pfile->contexts = (cpp_context *)
2684 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2685}
c5a04734 2686
041c3194
ZW
2687/* Push the context of macro NODE onto the context stack. TOKEN is
2688 the CPP_NAME token invoking the macro. */
417f3e3a
ZW
2689static int
2690push_macro_context (pfile, token)
041c3194 2691 cpp_reader *pfile;
041c3194
ZW
2692 const cpp_token *token;
2693{
2694 unsigned char orig_flags;
2695 macro_args *args;
2696 cpp_context *context;
417f3e3a 2697 cpp_hashnode *node = token->val.node;
c5a04734 2698
041c3194
ZW
2699 /* Token's flags may change when parsing args containing a nested
2700 invocation of this macro. */
2701 orig_flags = token->flags & (PREV_WHITE | BOL);
2702 args = 0;
2703 if (node->value.expansion->paramc >= 0)
c5a04734 2704 {
041c3194
ZW
2705 unsigned int error, prev_nme;
2706
2707 /* Allocate room for the argument contexts, and parse them. */
2708 args = (macro_args *) xmalloc (sizeof (macro_args));
2709 args->ends = (unsigned int *)
2710 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2711 args->tokens = 0;
2712 args->capacity = 0;
2713 args->used = 0;
2714 args->level = pfile->cur_context;
2715
2716 prev_nme = prevent_macro_expansion (pfile);
2717 pfile->args = args;
2718 error = parse_args (pfile, node, args);
2719 pfile->args = 0;
2720 restore_macro_expansion (pfile, prev_nme);
2721 if (error)
2722 {
2723 free_macro_args (args);
417f3e3a 2724 return 1;
041c3194 2725 }
c5a04734 2726 }
c5a04734 2727
041c3194
ZW
2728 /* Now push its context. */
2729 pfile->cur_context++;
2730 if (pfile->cur_context == pfile->context_cap)
2731 expand_context_stack (pfile);
2732
2733 context = CURRENT_CONTEXT (pfile);
2734 context->u.list = node->value.expansion;
2735 context->args = args;
2736 context->posn = 0;
2737 context->count = context->u.list->tokens_used;
2738 context->level = pfile->cur_context;
2739 context->flags = 0;
2740 context->pushed_token = 0;
2741
2742 /* Set the flags of the first token. We know there must
2743 be one, empty macros are a single placemarker token. */
2744 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2745
417f3e3a 2746 return 0;
c5a04734
ZW
2747}
2748
041c3194
ZW
2749/* Push an argument to the current macro onto the context stack.
2750 TOKEN is the MACRO_ARG token representing the argument expansion. */
417f3e3a 2751static void
041c3194
ZW
2752push_arg_context (pfile, token)
2753 cpp_reader *pfile;
2754 const cpp_token *token;
c5a04734 2755{
041c3194
ZW
2756 cpp_context *context;
2757 macro_args *args;
2758
2759 pfile->cur_context++;
2760 if (pfile->cur_context == pfile->context_cap)
2761 expand_context_stack (pfile);
2762
2763 context = CURRENT_CONTEXT (pfile);
2764 args = context[-1].args;
2765
2766 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2767 context->u.arg = args->tokens + context->count;
2768 context->count = args->ends[token->val.aux] - context->count;
2769 context->args = 0;
2770 context->posn = 0;
2771 context->level = args->level;
2772 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2773 context->pushed_token = 0;
2774
2775 /* Set the flags of the first token. There is one. */
2776 {
2777 const cpp_token *first = context->u.arg[0];
2778 if (!first)
2779 first = context->u.arg[1];
c5a04734 2780
041c3194
ZW
2781 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2782 token->flags & (PREV_WHITE | BOL));
2783 }
c5a04734 2784
041c3194
ZW
2785 if (token->flags & PASTE_LEFT)
2786 context->flags |= CONTEXT_PASTEL;
2787 if (pfile->paste_level)
2788 context->flags |= CONTEXT_PASTER;
c5a04734
ZW
2789}
2790
041c3194
ZW
2791/* "Unget" a token. It is effectively inserted in the token queue and
2792 will be returned by the next call to get_raw_token. */
c5a04734 2793void
041c3194 2794_cpp_push_token (pfile, token)
c5a04734 2795 cpp_reader *pfile;
041c3194 2796 const cpp_token *token;
c5a04734 2797{
041c3194
ZW
2798 cpp_context *context = CURRENT_CONTEXT (pfile);
2799 if (context->pushed_token)
2800 cpp_ice (pfile, "two tokens pushed in a row");
2801 if (token->type != CPP_EOF)
2802 context->pushed_token = token;
2803 /* Don't push back a directive's CPP_EOF, step back instead. */
2804 else if (pfile->cur_context == 0)
2805 pfile->contexts[0].posn--;
2806}
c5a04734 2807
041c3194
ZW
2808/* Handle a preprocessing directive. TOKEN is the CPP_HASH token
2809 introducing the directive. */
2810static void
2811process_directive (pfile, token)
2812 cpp_reader *pfile;
2813 const cpp_token *token;
2814{
2815 const struct directive *d = pfile->token_list.directive;
2816 int prev_nme = 0;
2817
2818 /* Skip over the directive name. */
2819 if (token[1].type == CPP_NAME)
2820 _cpp_get_raw_token (pfile);
2821 else if (token[1].type != CPP_NUMBER)
2822 cpp_ice (pfile, "directive begins with %s?!",
2823 token_names[token[1].type]);
2824
2825 /* Flush pending tokens at this point, in case the directive produces
2826 output. XXX Directive output won't be visible to a direct caller of
2827 cpp_get_token. */
2828 if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
2829 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
2830
2831 if (! (d->flags & EXPAND))
2832 prev_nme = prevent_macro_expansion (pfile);
2833 (void) (*d->handler) (pfile);
2834 if (! (d->flags & EXPAND))
2835 restore_macro_expansion (pfile, prev_nme);
2836 _cpp_skip_rest_of_line (pfile);
2837}
c5a04734 2838
041c3194
ZW
2839/* The external interface to return the next token. All macro
2840 expansion and directive processing is handled internally, the
2841 caller only ever sees the output after preprocessing. */
2842const cpp_token *
2843cpp_get_token (pfile)
2844 cpp_reader *pfile;
2845{
2846 const cpp_token *token;
417f3e3a 2847 /* Loop till we hit a non-directive, non-placemarker token. */
041c3194
ZW
2848 for (;;)
2849 {
417f3e3a
ZW
2850 token = _cpp_get_token (pfile);
2851
2852 if (token->type == CPP_PLACEMARKER)
2853 continue;
2854
2855 if (token->type == CPP_HASH && token->flags & BOL
041c3194 2856 && pfile->token_list.directive)
c5a04734 2857 {
041c3194
ZW
2858 process_directive (pfile, token);
2859 continue;
c5a04734
ZW
2860 }
2861
417f3e3a
ZW
2862 return token;
2863 }
2864}
2865
2866/* The internal interface to return the next token. There are two
2867 differences between the internal and external interfaces: the
2868 internal interface may return a PLACEMARKER token, and it does not
2869 process directives. */
2870const cpp_token *
2871_cpp_get_token (pfile)
2872 cpp_reader *pfile;
2873{
2874 const cpp_token *token;
2875 cpp_hashnode *node;
2876
2877 /* Loop until we hit a non-macro token. */
2878 for (;;)
2879 {
2880 token = get_raw_token (pfile);
2881
041c3194
ZW
2882 /* Short circuit EOF. */
2883 if (token->type == CPP_EOF)
2884 return token;
417f3e3a
ZW
2885
2886 /* If we are skipping... */
2887 if (pfile->skipping)
c5a04734 2888 {
417f3e3a
ZW
2889 /* we still have to process directives, */
2890 if (pfile->token_list.directive)
2891 return token;
2892
2893 /* but everything else is ignored. */
041c3194
ZW
2894 _cpp_skip_rest_of_line (pfile);
2895 continue;
2896 }
c5a04734 2897
417f3e3a
ZW
2898 /* If there's a potential control macro and we get here, then that
2899 #ifndef didn't cover the entire file and its argument shouldn't
2900 be taken as a control macro. */
2901 pfile->potential_control_macro = 0;
c5a04734 2902
417f3e3a
ZW
2903 /* See if there's a token to paste with this one. */
2904 if (!pfile->paste_level)
2905 token = maybe_paste_with_next (pfile, token);
c5a04734 2906
417f3e3a
ZW
2907 /* If it isn't a macro, return it now. */
2908 if (token->type != CPP_NAME
2909 || token->val.node->type == T_VOID)
2910 return token;
c5a04734 2911
417f3e3a
ZW
2912 /* Is macro expansion disabled in general? */
2913 if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
2914 return token;
041c3194 2915
417f3e3a
ZW
2916 node = token->val.node;
2917 if (node->type != T_MACRO)
2918 return special_symbol (pfile, node, token);
c5a04734 2919
041c3194
ZW
2920 if (is_macro_disabled (pfile, node->value.expansion, token))
2921 return token;
c5a04734 2922
417f3e3a
ZW
2923 if (pfile->cur_context > CPP_STACK_MAX)
2924 {
2925 cpp_error (pfile, "macros nested too deep invoking '%s'", node->name);
2926 return token;
2927 }
2928
2929 if (push_macro_context (pfile, token))
2930 return token;
2931 /* else loop */
041c3194 2932 }
041c3194 2933}
c5a04734 2934
041c3194
ZW
2935/* Returns the next raw token, i.e. without performing macro
2936 expansion. Argument contexts are automatically entered. */
2937static const cpp_token *
2938get_raw_token (pfile)
2939 cpp_reader *pfile;
2940{
2941 const cpp_token *result;
417f3e3a 2942 cpp_context *context;
c5a04734 2943
417f3e3a 2944 for (;;)
041c3194 2945 {
417f3e3a
ZW
2946 context = CURRENT_CONTEXT (pfile);
2947 if (context->pushed_token)
041c3194 2948 {
417f3e3a
ZW
2949 result = context->pushed_token;
2950 context->pushed_token = 0;
2951 }
2952 else if (context->posn == context->count)
2953 {
2954 if (pop_context (pfile))
2955 return &eof_token;
2956 continue;
2957 }
2958 else
2959 {
2960 if (IS_ARG_CONTEXT (context))
c5a04734 2961 {
041c3194 2962 result = context->u.arg[context->posn++];
417f3e3a
ZW
2963 if (result == 0)
2964 {
2965 context->flags ^= CONTEXT_RAW;
2966 result = context->u.arg[context->posn++];
2967 }
2968 return result; /* Cannot be a CPP_MACRO_ARG */
c5a04734 2969 }
417f3e3a 2970 result = &context->u.list->tokens[context->posn++];
041c3194 2971 }
c5a04734 2972
417f3e3a
ZW
2973 if (result->type != CPP_MACRO_ARG)
2974 return result;
2975
2976 if (result->flags & STRINGIFY_ARG)
2977 return stringify_arg (pfile, result);
2978
2979 push_arg_context (pfile, result);
2980 }
041c3194 2981}
c5a04734 2982
041c3194
ZW
2983/* Internal interface to get the token without macro expanding. */
2984const cpp_token *
2985_cpp_get_raw_token (pfile)
2986 cpp_reader *pfile;
2987{
2988 int prev_nme = prevent_macro_expansion (pfile);
417f3e3a 2989 const cpp_token *result = _cpp_get_token (pfile);
041c3194
ZW
2990 restore_macro_expansion (pfile, prev_nme);
2991 return result;
2992}
c5a04734 2993
041c3194
ZW
2994/* A thin wrapper to lex_line. CLEAR is non-zero if the current token
2995 list should be overwritten, or zero if we need to append
2996 (typically, if we are within the arguments to a macro, or looking
2997 for the '(' to start a function-like macro invocation). */
2998static int
2999lex_next (pfile, clear)
3000 cpp_reader *pfile;
3001 int clear;
3002{
3003 cpp_toklist *list = &pfile->token_list;
3004 const cpp_token *old_list = list->tokens;
3005 unsigned int old_used = list->tokens_used;
c5a04734 3006
041c3194 3007 if (clear)
c5a04734 3008 {
041c3194
ZW
3009 /* Release all temporary tokens. */
3010 _cpp_clear_toklist (list);
3011 pfile->contexts[0].posn = 0;
3012 if (pfile->temp_used)
3013 release_temp_tokens (pfile);
c5a04734 3014 }
041c3194
ZW
3015
3016 lex_line (pfile, list);
3017 pfile->contexts[0].count = list->tokens_used;
c5a04734 3018
041c3194 3019 if (!clear && pfile->args)
c5a04734 3020 {
041c3194
ZW
3021 /* Fix up argument token pointers. */
3022 if (old_list != list->tokens)
3023 {
3024 unsigned int i;
3025
3026 for (i = 0; i < pfile->args->used; i++)
3027 {
3028 const cpp_token *token = pfile->args->tokens[i];
3029 if (token >= old_list && token < old_list + old_used)
3030 pfile->args->tokens[i] = (const cpp_token *)
3031 ((char *) token + ((char *) list->tokens - (char *) old_list));
3032 }
3033 }
3034
3035 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3036 tokens within the list of arguments that would otherwise act as
3037 preprocessing directives, the behavior is undefined.
3038
3039 This implementation will report a hard error and treat the
3040 'sequence of preprocessing tokens' as part of the macro argument,
3041 not a directive.
3042
3043 Note if pfile->args == 0, we're OK since we're only inside a
3044 macro argument after a '('. */
3045 if (list->directive)
3046 {
3047 cpp_error_with_line (pfile, list->tokens[old_used].line,
3048 list->tokens[old_used].col,
3049 "#%s may not be used inside a macro argument",
3050 list->directive->name);
91fcd158 3051 return 1;
041c3194 3052 }
c5a04734
ZW
3053 }
3054
041c3194 3055 return 0;
c5a04734
ZW
3056}
3057
417f3e3a
ZW
3058/* Pops a context off the context stack. If we're at the bottom, lexes
3059 the next logical line. Returns EOF if we're at the end of the
041c3194
ZW
3060 argument list to the # operator, or if it is illegal to "overflow"
3061 into the rest of the file (e.g. 6.10.3.1.1). */
3062static int
417f3e3a 3063pop_context (pfile)
041c3194
ZW
3064 cpp_reader *pfile;
3065{
3066 cpp_context *context;
3fef5b2b 3067
041c3194 3068 if (pfile->cur_context == 0)
417f3e3a
ZW
3069 {
3070 /* If we are currently processing a directive, do not advance. 6.10
3071 paragraph 2: A new-line character ends the directive even if it
3072 occurs within what would otherwise be an invocation of a
3073 function-like macro. */
3074 if (pfile->token_list.directive)
3075 return 1;
3076
3077 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3078 }
041c3194
ZW
3079
3080 /* Argument contexts, when parsing args or handling # operator
3081 return CPP_EOF at the end. */
3082 context = CURRENT_CONTEXT (pfile);
3083 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3084 return 1;
3085
3086 /* Free resources when leaving macro contexts. */
3087 if (context->args)
3088 free_macro_args (context->args);
3089
3090 if (pfile->cur_context == pfile->no_expand_level)
3091 pfile->no_expand_level--;
3092 pfile->cur_context--;
3093
3094 return 0;
3095}
3096
041c3194
ZW
3097/* Turn off macro expansion at the current context level. */
3098static unsigned int
3099prevent_macro_expansion (pfile)
3100 cpp_reader *pfile;
3101{
3102 unsigned int prev_value = pfile->no_expand_level;
3103 pfile->no_expand_level = pfile->cur_context;
3104 return prev_value;
3105}
3106
3107/* Restore macro expansion to its previous state. */
3108static void
3109restore_macro_expansion (pfile, prev_value)
3110 cpp_reader *pfile;
3111 unsigned int prev_value;
3112{
3113 pfile->no_expand_level = prev_value;
3114}
3115
3116/* Used by cpperror.c to obtain the correct line and column to report
3117 in a diagnostic. */
3118unsigned int
3119_cpp_get_line (pfile, pcol)
3120 cpp_reader *pfile;
3121 unsigned int *pcol;
3122{
3123 unsigned int index;
3124 const cpp_token *cur_token;
3125
3126 if (pfile->in_lex_line)
3127 index = pfile->token_list.tokens_used;
3128 else
3129 index = pfile->contexts[0].posn;
3130
3131 cur_token = &pfile->token_list.tokens[index - 1];
3132 if (pcol)
3133 *pcol = cur_token->col;
3134 return cur_token->line;
3135}
3136
3137#define DSC(str) (const U_CHAR *)str, sizeof str - 1
3138static const char * const monthnames[] =
3139{
3140 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3141 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3142};
3143
3144/* Handle builtin macros like __FILE__. */
3145static const cpp_token *
3146special_symbol (pfile, node, token)
3147 cpp_reader *pfile;
3148 cpp_hashnode *node;
d1d9a6bd 3149 const cpp_token *token;
3fef5b2b 3150{
041c3194
ZW
3151 cpp_token *result;
3152 cpp_buffer *ip;
3fef5b2b 3153
041c3194 3154 switch (node->type)
3fef5b2b 3155 {
041c3194
ZW
3156 case T_FILE:
3157 case T_BASE_FILE:
3fef5b2b 3158 {
041c3194 3159 const char *file;
3fef5b2b 3160
041c3194
ZW
3161 ip = CPP_BUFFER (pfile);
3162 if (ip == 0)
3163 file = "";
3fef5b2b 3164 else
041c3194
ZW
3165 {
3166 if (node->type == T_BASE_FILE)
3167 while (CPP_PREV_BUFFER (ip) != NULL)
3168 ip = CPP_PREV_BUFFER (ip);
3169
3170 file = ip->nominal_fname;
3171 }
3172 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3173 strlen (file));
3174 }
3175 break;
3fef5b2b 3176
041c3194 3177 case T_INCLUDE_LEVEL:
f9a0e96c
ZW
3178 /* pfile->include_depth counts the primary source as level 1,
3179 but historically __INCLUDE_DEPTH__ has called the primary
3180 source level 0. */
3181 result = alloc_number_token (pfile, pfile->include_depth - 1);
3fef5b2b
NB
3182 break;
3183
041c3194
ZW
3184 case T_SPECLINE:
3185 /* If __LINE__ is embedded in a macro, it must expand to the
3186 line of the macro's invocation, not its definition.
3187 Otherwise things like assert() will not work properly. */
3188 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
5d7ee2fa
NB
3189 break;
3190
041c3194 3191 case T_STDC:
3fef5b2b 3192 {
041c3194 3193 int stdc = 1;
3fef5b2b 3194
041c3194
ZW
3195#ifdef STDC_0_IN_SYSTEM_HEADERS
3196 if (CPP_IN_SYSTEM_HEADER (pfile)
bfb9dc7f 3197 && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
041c3194
ZW
3198 stdc = 0;
3199#endif
3200 result = alloc_number_token (pfile, stdc);
3fef5b2b
NB
3201 }
3202 break;
3203
041c3194
ZW
3204 case T_DATE:
3205 case T_TIME:
3206 if (pfile->date == 0)
3207 {
3208 /* Allocate __DATE__ and __TIME__ from permanent storage,
3209 and save them in pfile so we don't have to do this again.
3210 We don't generate these strings at init time because
3211 time() and localtime() are very slow on some systems. */
3212 time_t tt = time (NULL);
3213 struct tm *tb = localtime (&tt);
3214
3215 pfile->date = make_string_token
3216 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3217 pfile->time = make_string_token
3218 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3219
bfb9dc7f 3220 sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
041c3194 3221 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
bfb9dc7f 3222 sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
041c3194
ZW
3223 tb->tm_hour, tb->tm_min, tb->tm_sec);
3224 }
3225 result = node->type == T_DATE ? pfile->date: pfile->time;
3fef5b2b
NB
3226 break;
3227
041c3194 3228 case T_POISON:
bfb9dc7f 3229 cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
041c3194
ZW
3230 return token;
3231
3232 default:
3233 cpp_ice (pfile, "invalid special hash type");
3234 return token;
3fef5b2b
NB
3235 }
3236
041c3194
ZW
3237 ASSIGN_FLAGS_AND_POS (result, token);
3238 return result;
3239}
3240#undef DSC
3241
3242/* Dump the original user's spelling of argument index ARG_NO to the
3243 macro whose expansion is LIST. */
3244static void
3245dump_param_spelling (pfile, list, arg_no)
3246 cpp_reader *pfile;
3247 const cpp_toklist *list;
3248 unsigned int arg_no;
3249{
3250 const U_CHAR *param = list->namebuf;
3251
3252 while (arg_no--)
3253 param += ustrlen (param) + 1;
3254 CPP_PUTS (pfile, param, ustrlen (param));
3fef5b2b
NB
3255}
3256
041c3194 3257/* Dump a token list to the output. */
c5a04734 3258void
041c3194
ZW
3259_cpp_dump_list (pfile, list, token, flush)
3260 cpp_reader *pfile;
3261 const cpp_toklist *list;
3262 const cpp_token *token;
3263 int flush;
c5a04734 3264{
041c3194
ZW
3265 const cpp_token *limit = list->tokens + list->tokens_used;
3266 const cpp_token *prev = 0;
c5a04734 3267
041c3194
ZW
3268 /* Avoid the CPP_EOF. */
3269 if (list->directive)
3270 limit--;
c5a04734 3271
041c3194 3272 while (token < limit)
c5a04734 3273 {
041c3194
ZW
3274 if (token->type == CPP_MACRO_ARG)
3275 {
3276 if (token->flags & PREV_WHITE)
3277 CPP_PUTC (pfile, ' ');
3278 if (token->flags & STRINGIFY_ARG)
3279 CPP_PUTC (pfile, '#');
3280 dump_param_spelling (pfile, list, token->val.aux);
3281 }
c5a04734 3282 else
041c3194
ZW
3283 output_token (pfile, token, prev);
3284 if (token->flags & PASTE_LEFT)
3285 CPP_PUTS (pfile, " ##", 3);
3286 prev = token;
3287 token++;
c5a04734 3288 }
041c3194
ZW
3289
3290 if (flush && pfile->printer)
3291 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
c5a04734
ZW
3292}
3293
041c3194
ZW
3294/* Allocate pfile->input_buffer, and initialize trigraph_map[]
3295 if it hasn't happened already. */
3296
3297void
3298_cpp_init_input_buffer (pfile)
3299 cpp_reader *pfile;
3300{
417f3e3a
ZW
3301 cpp_context *base;
3302
041c3194 3303 init_trigraph_map ();
417f3e3a
ZW
3304 _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3305 pfile->no_expand_level = UINT_MAX;
041c3194 3306 pfile->context_cap = 20;
041c3194 3307 pfile->cur_context = 0;
041c3194 3308
417f3e3a
ZW
3309 pfile->contexts = (cpp_context *)
3310 xmalloc (pfile->context_cap * sizeof (cpp_context));
041c3194 3311
417f3e3a
ZW
3312 /* Clear the base context. */
3313 base = &pfile->contexts[0];
3314 base->u.list = &pfile->token_list;
3315 base->posn = 0;
3316 base->count = 0;
3317 base->args = 0;
3318 base->level = 0;
3319 base->flags = 0;
3320 base->pushed_token = 0;
041c3194
ZW
3321}
3322
3323/* Moves to the end of the directive line, popping contexts as
3324 necessary. */
3325void
3326_cpp_skip_rest_of_line (pfile)
3327 cpp_reader *pfile;
3328{
417f3e3a
ZW
3329 /* Discard all stacked contexts. */
3330 int i;
3331 for (i = pfile->cur_context; i > 0; i--)
3332 if (pfile->contexts[i].args)
3333 free_macro_args (pfile->contexts[i].args);
3334
3335 if (pfile->no_expand_level <= pfile->cur_context)
3336 pfile->no_expand_level = 0;
3337 pfile->cur_context = 0;
041c3194 3338
417f3e3a
ZW
3339 /* Clear the base context, and clear the directive pointer so that
3340 get_raw_token will advance to the next line. */
3341 pfile->contexts[0].count = 0;
3342 pfile->contexts[0].posn = 0;
041c3194 3343 pfile->token_list.directive = 0;
c5a04734
ZW
3344}
3345
041c3194
ZW
3346/* Directive handler wrapper used by the command line option
3347 processor. */
3348void
3349_cpp_run_directive (pfile, dir, buf, count)
3350 cpp_reader *pfile;
3351 const struct directive *dir;
3352 const char *buf;
3353 size_t count;
3354{
3355 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3356 {
3357 unsigned int prev_lvl = 0;
417f3e3a
ZW
3358
3359 /* Scan the line now, else prevent_macro_expansion won't work. */
3360 lex_next (pfile, 1);
041c3194
ZW
3361 if (! (dir->flags & EXPAND))
3362 prev_lvl = prevent_macro_expansion (pfile);
3363
3364 (void) (*dir->handler) (pfile);
3365
3366 if (! (dir->flags & EXPAND))
3367 restore_macro_expansion (pfile, prev_lvl);
3368
3369 _cpp_skip_rest_of_line (pfile);
3370 cpp_pop_buffer (pfile);
3371 }
3372}
This page took 0.51803 seconds and 5 git commands to generate.