]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
* gcc.dg/20000715-1.c: New test.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
041c3194
ZW
23/*
24
25Cleanups to do:-
26
041c3194
ZW
27o -dM and with _cpp_dump_list: too many \n output.
28o Put a printer object in cpp_reader?
29o Check line numbers assigned to all errors.
30o Replace strncmp with memcmp almost everywhere.
31o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
041c3194
ZW
32o Convert do_ functions to return void. Kaveh thinks its OK; and said he'll
33 give it a run when we've got some code.
041c3194
ZW
34o Distinguish integers, floats, and 'other' pp-numbers.
35o Store ints and char constants as binary values.
36o New command-line assertion syntax.
041c3194
ZW
37o Work towards functions in cpperror.c taking a message level parameter.
38 If we do this, merge the common code of do_warning and do_error.
39o Comment all functions, and describe macro expansion algorithm.
40o Move as much out of header files as possible.
41o Remove single quote pairs `', and some '', from diagnostics.
42o Correct pastability test for CPP_NAME and CPP_NUMBER.
43
44*/
45
45b966db
ZW
46#include "config.h"
47#include "system.h"
48#include "intl.h"
49#include "cpplib.h"
50#include "cpphash.h"
041c3194 51#include "symcat.h"
45b966db 52
f9a0e96c
ZW
53static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
54static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
55
56/* Flags for cpp_context. */
57#define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
58#define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
59#define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
60#define CONTEXT_ARG (1 << 3) /* If an argument context. */
61
62typedef struct cpp_context cpp_context;
63struct cpp_context
64{
65 union
66 {
67 const cpp_toklist *list; /* Used for macro contexts only. */
68 const cpp_token **arg; /* Used for arg contexts only. */
69 } u;
70
71 /* Pushed token to be returned by next call to get_raw_token. */
72 const cpp_token *pushed_token;
73
74 struct macro_args *args; /* 0 for arguments and object-like macros. */
75 unsigned short posn; /* Current posn, index into u. */
76 unsigned short count; /* No. of tokens in u. */
77 unsigned short level;
78 unsigned char flags;
79};
80
81typedef struct macro_args macro_args;
82struct macro_args
83{
84 unsigned int *ends;
85 const cpp_token **tokens;
86 unsigned int capacity;
87 unsigned int used;
88 unsigned short level;
89};
90
91static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
92static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
93 macro_args *, unsigned int *));
94static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
95static void save_token PARAMS ((macro_args *, const cpp_token *));
96static int pop_context PARAMS ((cpp_reader *));
97static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
98static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
99static void free_macro_args PARAMS ((macro_args *));
100
041c3194
ZW
101#define auto_expand_name_space(list) \
102 _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
f2d5f0cc
ZW
103static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
104 size_t, FILE *));
041c3194 105static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
1368ee70 106 unsigned int));
041c3194 107static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
9e62c811 108 unsigned int));
f2d5f0cc 109
041c3194
ZW
110static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
111static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
b8f41010
NB
112 unsigned char *));
113static const unsigned char *backslash_start PARAMS ((cpp_reader *,
114 const unsigned char *));
041c3194
ZW
115static int skip_block_comment PARAMS ((cpp_reader *));
116static int skip_line_comment PARAMS ((cpp_reader *));
52fadca8 117static void adjust_column PARAMS ((cpp_reader *, const U_CHAR *));
b8f41010 118static void skip_whitespace PARAMS ((cpp_reader *, int));
bfb9dc7f
ZW
119static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
120 const U_CHAR *, const U_CHAR *));
121static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
041c3194
ZW
122static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
123 unsigned int));
b8f41010 124static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
f624ffa7
NB
125static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
126 const unsigned char *,
ad265aa4 127 unsigned int, unsigned int));
041c3194
ZW
128static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
129static int lex_next PARAMS ((cpp_reader *, int));
130static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
131 const cpp_token *));
b8f41010 132
041c3194
ZW
133static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
134static void expand_context_stack PARAMS ((cpp_reader *));
d1d9a6bd 135static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
041c3194
ZW
136 unsigned char *));
137static void output_token PARAMS ((cpp_reader *, const cpp_token *,
138 const cpp_token *));
b8f41010
NB
139typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
140 cpp_token *));
041c3194
ZW
141static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
142 unsigned int));
143static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
144static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
145 const cpp_token *));
146static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
147static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
148 const cpp_token *));
149static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
150 const cpp_token *, int *));
151static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
152static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
153static cpp_token *get_temp_token PARAMS ((cpp_reader *));
154static void release_temp_tokens PARAMS ((cpp_reader *));
155static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
156static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
b8f41010 157
bfb9dc7f
ZW
158#define INIT_TOKEN_STR(list, token) \
159 do {(token)->val.str.len = 0; \
160 (token)->val.str.text = (list)->namebuf + (list)->name_used; \
d1d9a6bd 161 } while (0)
b8f41010 162
041c3194
ZW
163#define VALID_SIGN(c, prevc) \
164 (((c) == '+' || (c) == '-') && \
165 ((prevc) == 'e' || (prevc) == 'E' \
166 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
167
b8f41010
NB
168/* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
169 character, if any, is in buffer. */
041c3194 170
b8f41010 171#define handle_newline(cur, limit, c) \
041c3194 172 do { \
b8f41010
NB
173 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
174 (cur)++; \
041c3194
ZW
175 pfile->buffer->lineno++; \
176 pfile->buffer->line_base = (cur); \
6ab3e7dd 177 pfile->col_adjust = 0; \
041c3194 178 } while (0)
b8f41010 179
041c3194 180#define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
b8f41010
NB
181#define PREV_TOKEN_TYPE (cur_token[-1].type)
182
f9a0e96c
ZW
183#define PUSH_TOKEN(ttype) cur_token++->type = (ttype)
184#define REVISE_TOKEN(ttype) cur_token[-1].type = (ttype)
185#define BACKUP_TOKEN(ttype) (--cur_token)->type = (ttype)
f617b8e2
NB
186#define BACKUP_DIGRAPH(ttype) do { \
187 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
188
189/* An upper bound on the number of bytes needed to spell a token,
190 including preceding whitespace. */
bfb9dc7f
ZW
191#define TOKEN_LEN(token) (5 + (TOKEN_SPELL(token) == SPELL_STRING \
192 ? (token)->val.str.len \
193 : (TOKEN_SPELL(token) == SPELL_IDENT \
194 ? (token)->val.node->length \
195 : 0)))
f617b8e2 196
f9a0e96c
ZW
197#define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
198#define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
199
200#define ASSIGN_FLAGS_AND_POS(d, s) \
201 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
202 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
203 } while (0)
204
205/* f is flags, just consisting of PREV_WHITE | BOL. */
206#define MODIFY_FLAGS_AND_POS(d, s, f) \
207 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
208 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
209 } while (0)
210
f617b8e2 211#define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
5d7ee2fa
NB
212#define I(e, s) {SPELL_IDENT, s},
213#define S(e, s) {SPELL_STRING, s},
b8f41010
NB
214#define C(e, s) {SPELL_CHAR, s},
215#define N(e, s) {SPELL_NONE, s},
b8f41010 216
041c3194
ZW
217const struct token_spelling
218token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
219
220#undef T
221#undef I
222#undef S
223#undef C
224#undef N
225
226/* For debugging: the internal names of the tokens. */
b449f23a
KG
227#define T(e, s) U STRINGX(e),
228#define I(e, s) U STRINGX(e),
229#define S(e, s) U STRINGX(e),
230#define C(e, s) U STRINGX(e),
231#define N(e, s) U STRINGX(e),
041c3194 232
cf00a885 233const U_CHAR *const token_names[N_TTYPES] = { TTYPE_TABLE };
b8f41010
NB
234
235#undef T
5d7ee2fa
NB
236#undef I
237#undef S
b8f41010
NB
238#undef C
239#undef N
b8f41010 240
041c3194
ZW
241/* The following table is used by trigraph_ok/trigraph_replace. If we
242 have designated initializers, it can be constant data; otherwise,
243 it is set up at runtime by _cpp_init_input_buffer. */
244
245#if (GCC_VERSION >= 2007)
246#define init_trigraph_map() /* nothing */
247#define TRIGRAPH_MAP \
248__extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
249#define END };
250#define s(p, v) [p] = v,
251#else
252#define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
253 static void init_trigraph_map PARAMS ((void)) { \
254 unsigned char *x = trigraph_map;
255#define END }
256#define s(p, v) x[p] = v;
257#endif
258
259TRIGRAPH_MAP
260 s('=', '#') s(')', ']') s('!', '|')
261 s('(', '[') s('\'', '^') s('>', '}')
262 s('/', '\\') s('<', '{') s('-', '~')
263END
264
265#undef TRIGRAPH_MAP
266#undef END
267#undef s
268
45b966db
ZW
269/* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
270
271void
272_cpp_grow_token_buffer (pfile, n)
273 cpp_reader *pfile;
274 long n;
275{
276 long old_written = CPP_WRITTEN (pfile);
277 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
278 pfile->token_buffer = (U_CHAR *)
279 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
280 CPP_SET_WRITTEN (pfile, old_written);
281}
282
f2d5f0cc
ZW
283/* Deal with the annoying semantics of fwrite. */
284static void
285safe_fwrite (pfile, buf, len, fp)
286 cpp_reader *pfile;
287 const U_CHAR *buf;
288 size_t len;
289 FILE *fp;
290{
291 size_t count;
45b966db 292
f2d5f0cc
ZW
293 while (len)
294 {
295 count = fwrite (buf, 1, len, fp);
296 if (count == 0)
297 goto error;
298 len -= count;
299 buf += count;
300 }
301 return;
302
303 error:
304 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
305}
306
307/* Notify the compiler proper that the current line number has jumped,
308 or the current file name has changed. */
309
310static void
1368ee70 311output_line_command (pfile, print, line)
45b966db 312 cpp_reader *pfile;
f2d5f0cc 313 cpp_printer *print;
1368ee70 314 unsigned int line;
45b966db 315{
041c3194 316 cpp_buffer *ip = CPP_BUFFER (pfile);
f2d5f0cc
ZW
317 enum { same = 0, enter, leave, rname } change;
318 static const char * const codes[] = { "", " 1", " 2", "" };
319
041c3194
ZW
320 if (line == 0)
321 return;
322
323 /* End the previous line of text. */
324 if (pfile->need_newline)
325 putc ('\n', print->outf);
326 pfile->need_newline = 0;
327
f2d5f0cc
ZW
328 if (CPP_OPTION (pfile, no_line_commands))
329 return;
330
041c3194
ZW
331 /* If ip is null, we've been called from cpp_finish, and they just
332 needed the final flush and trailing newline. */
333 if (!ip)
334 return;
335
fb753f88 336 if (pfile->include_depth == print->last_id)
54bef41d 337 {
fb753f88
JJ
338 /* Determine whether the current filename has changed, and if so,
339 how. 'nominal_fname' values are unique, so they can be compared
340 by comparing pointers. */
341 if (ip->nominal_fname == print->last_fname)
342 change = same;
343 else
0e500c78 344 change = rname;
fb753f88
JJ
345 }
346 else
347 {
348 if (pfile->include_depth > print->last_id)
349 change = enter;
f2d5f0cc 350 else
fb753f88
JJ
351 change = leave;
352 print->last_id = pfile->include_depth;
45b966db 353 }
fb753f88
JJ
354 print->last_fname = ip->nominal_fname;
355
f2d5f0cc
ZW
356 /* If the current file has not changed, we can output a few newlines
357 instead if we want to increase the line number by a small amount.
358 We cannot do this if print->lineno is zero, because that means we
359 haven't output any line commands yet. (The very first line
360 command output is a `same_file' command.) */
041c3194 361 if (change == same && print->lineno > 0
f2d5f0cc 362 && line >= print->lineno && line < print->lineno + 8)
45b966db 363 {
f2d5f0cc 364 while (line > print->lineno)
45b966db 365 {
f2d5f0cc
ZW
366 putc ('\n', print->outf);
367 print->lineno++;
45b966db 368 }
f2d5f0cc 369 return;
45b966db 370 }
f2d5f0cc
ZW
371
372#ifndef NO_IMPLICIT_EXTERN_C
373 if (CPP_OPTION (pfile, cplusplus))
374 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
375 codes[change],
c31a6508
ZW
376 ip->inc->sysp ? " 3" : "",
377 (ip->inc->sysp == 2) ? " 4" : "");
f2d5f0cc
ZW
378 else
379#endif
380 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
381 codes[change],
c31a6508 382 ip->inc->sysp ? " 3" : "");
f2d5f0cc
ZW
383 print->lineno = line;
384}
385
386/* Write the contents of the token_buffer to the output stream, and
387 clear the token_buffer. Also handles generating line commands and
388 keeping track of file transitions. */
389
390void
041c3194 391cpp_output_tokens (pfile, print, line)
f2d5f0cc
ZW
392 cpp_reader *pfile;
393 cpp_printer *print;
041c3194 394 unsigned int line;
f2d5f0cc 395{
f6fab919
ZW
396 if (CPP_WRITTEN (pfile) - print->written)
397 {
f6fab919
ZW
398 safe_fwrite (pfile, pfile->token_buffer,
399 CPP_WRITTEN (pfile) - print->written, print->outf);
041c3194
ZW
400 pfile->need_newline = 1;
401 if (print->lineno)
402 print->lineno++;
45b966db 403
041c3194 404 CPP_SET_WRITTEN (pfile, print->written);
f2d5f0cc 405 }
041c3194 406 output_line_command (pfile, print, line);
45b966db
ZW
407}
408
c56c2073 409/* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
f2d5f0cc
ZW
410
411void
412cpp_scan_buffer_nooutput (pfile)
413 cpp_reader *pfile;
414{
f2d5f0cc 415 unsigned int old_written = CPP_WRITTEN (pfile);
041c3194
ZW
416 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
417
f2d5f0cc
ZW
418 for (;;)
419 {
041c3194
ZW
420 /* In no-output mode, we can ignore everything but directives. */
421 const cpp_token *token = cpp_get_token (pfile);
422 if (token->type == CPP_EOF)
423 {
424 cpp_pop_buffer (pfile);
425 if (CPP_BUFFER (pfile) == stop)
426 break;
427 }
428 _cpp_skip_rest_of_line (pfile);
f2d5f0cc
ZW
429 }
430 CPP_SET_WRITTEN (pfile, old_written);
431}
432
c56c2073 433/* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
f2d5f0cc
ZW
434void
435cpp_scan_buffer (pfile, print)
436 cpp_reader *pfile;
437 cpp_printer *print;
438{
c56c2073 439 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
041c3194 440 const cpp_token *token, *prev = 0;
f2d5f0cc
ZW
441
442 for (;;)
443 {
444 token = cpp_get_token (pfile);
041c3194 445 if (token->type == CPP_EOF)
f2d5f0cc 446 {
041c3194
ZW
447 cpp_pop_buffer (pfile);
448 if (CPP_BUFFER (pfile) == stop)
f2d5f0cc 449 return;
041c3194
ZW
450 cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
451 prev = 0;
452 continue;
453 }
454
455 if (token->flags & BOL)
456 {
457 cpp_output_tokens (pfile, print, pfile->token_list.line);
458 prev = 0;
f2d5f0cc 459 }
041c3194
ZW
460
461 output_token (pfile, token, prev);
462 prev = token;
f2d5f0cc
ZW
463 }
464}
465
f9a0e96c
ZW
466/* Scan a single line of the input into the token_buffer. */
467void
468cpp_scan_line (pfile)
469 cpp_reader *pfile;
470{
471 const cpp_token *token, *prev = 0;
472
473 do
474 {
475 token = cpp_get_token (pfile);
476 if (token->type == CPP_EOF)
477 {
478 cpp_pop_buffer (pfile);
479 break;
480 }
481
482 output_token (pfile, token, prev);
483 prev = token;
484 }
485 while (pfile->cur_context > 0
486 || pfile->contexts[0].posn < pfile->contexts[0].count);
487}
488
041c3194
ZW
489/* Helper routine used by parse_include, which can't see spell_token.
490 Reinterpret the current line as an h-char-sequence (< ... >); we are
491 looking at the first token after the <. */
492const cpp_token *
493_cpp_glue_header_name (pfile)
45b966db
ZW
494 cpp_reader *pfile;
495{
041c3194
ZW
496 unsigned int written = CPP_WRITTEN (pfile);
497 const cpp_token *t;
498 cpp_token *hdr;
499 U_CHAR *buf;
500 size_t len;
501
502 for (;;)
503 {
417f3e3a 504 t = _cpp_get_token (pfile);
041c3194
ZW
505 if (t->type == CPP_GREATER || t->type == CPP_EOF)
506 break;
507
508 CPP_RESERVE (pfile, TOKEN_LEN (t));
509 if (t->flags & PREV_WHITE)
510 CPP_PUTC_Q (pfile, ' ');
511 pfile->limit = spell_token (pfile, t, pfile->limit);
512 }
513
514 if (t->type == CPP_EOF)
515 cpp_error (pfile, "missing terminating > character");
45b966db 516
041c3194
ZW
517 len = CPP_WRITTEN (pfile) - written;
518 buf = xmalloc (len);
519 memcpy (buf, pfile->token_buffer + written, len);
520 CPP_SET_WRITTEN (pfile, written);
521
522 hdr = get_temp_token (pfile);
523 hdr->type = CPP_HEADER_NAME;
524 hdr->flags = 0;
bfb9dc7f
ZW
525 hdr->val.str.text = buf;
526 hdr->val.str.len = len;
041c3194 527 return hdr;
45b966db
ZW
528}
529
1368ee70
ZW
530/* Token-buffer helper functions. */
531
d1d9a6bd
NB
532/* Expand a token list's string space. It is *vital* that
533 list->tokens_used is correct, to get pointer fix-up right. */
041c3194
ZW
534void
535_cpp_expand_name_space (list, len)
1368ee70 536 cpp_toklist *list;
c5a04734
ZW
537 unsigned int len;
538{
f617b8e2 539 const U_CHAR *old_namebuf;
f617b8e2
NB
540
541 old_namebuf = list->namebuf;
c5a04734
ZW
542 list->name_cap += len;
543 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
f617b8e2
NB
544
545 /* Fix up token text pointers. */
79f50f2a 546 if (list->namebuf != old_namebuf)
f617b8e2
NB
547 {
548 unsigned int i;
549
550 for (i = 0; i < list->tokens_used; i++)
bfb9dc7f
ZW
551 if (token_spellings[list->tokens[i].type].type == SPELL_STRING)
552 list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
f617b8e2 553 }
1368ee70
ZW
554}
555
041c3194
ZW
556/* If there is not enough room for LEN more characters, expand the
557 list by just enough to have room for LEN characters. */
558void
559_cpp_reserve_name_space (list, len)
560 cpp_toklist *list;
561 unsigned int len;
562{
563 unsigned int room = list->name_cap - list->name_used;
564
565 if (room < len)
566 _cpp_expand_name_space (list, len - room);
567}
568
1368ee70 569/* Expand the number of tokens in a list. */
d1d9a6bd
NB
570void
571_cpp_expand_token_space (list, count)
1368ee70 572 cpp_toklist *list;
d1d9a6bd 573 unsigned int count;
1368ee70 574{
d1d9a6bd
NB
575 unsigned int n;
576
577 list->tokens_cap += count;
578 n = list->tokens_cap;
15dad1d9 579 if (list->flags & LIST_OFFSET)
d1d9a6bd 580 list->tokens--, n++;
1368ee70 581 list->tokens = (cpp_token *)
d1d9a6bd 582 xrealloc (list->tokens, n * sizeof (cpp_token));
15dad1d9
ZW
583 if (list->flags & LIST_OFFSET)
584 list->tokens++; /* Skip the dummy. */
1368ee70
ZW
585}
586
d1d9a6bd
NB
587/* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
588 an extra token in front of the token list, as this allows the lexer
589 to always peek at the previous token without worrying about
590 underflowing the list, and some initial space. Otherwise, no
591 token- or name-space is allocated, and there is no dummy token. */
15dad1d9 592void
d1d9a6bd 593_cpp_init_toklist (list, flags)
1368ee70 594 cpp_toklist *list;
d1d9a6bd 595 int flags;
1368ee70 596{
d1d9a6bd
NB
597 if (flags == NO_DUMMY_TOKEN)
598 {
599 list->tokens_cap = 0;
041c3194 600 list->tokens = 0;
d1d9a6bd 601 list->name_cap = 0;
041c3194 602 list->namebuf = 0;
d1d9a6bd
NB
603 list->flags = 0;
604 }
605 else
606 {
607 /* Initialize token space. Put a dummy token before the start
608 that will fail matches. */
609 list->tokens_cap = 256; /* 4K's worth. */
610 list->tokens = (cpp_token *)
611 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
612 list->tokens[0].type = CPP_EOF;
613 list->tokens++;
614
615 /* Initialize name space. */
616 list->name_cap = 1024;
041c3194 617 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
d1d9a6bd
NB
618 list->flags = LIST_OFFSET;
619 }
15dad1d9 620
15dad1d9
ZW
621 _cpp_clear_toklist (list);
622}
1368ee70 623
15dad1d9
ZW
624/* Clear a token list. */
625void
626_cpp_clear_toklist (list)
627 cpp_toklist *list;
628{
c5a04734
ZW
629 list->tokens_used = 0;
630 list->name_used = 0;
041c3194
ZW
631 list->directive = 0;
632 list->paramc = 0;
633 list->params_len = 0;
15dad1d9
ZW
634 list->flags &= LIST_OFFSET; /* clear all but that one */
635}
636
637/* Free a token list. Does not free the list itself, which may be
638 embedded in a larger structure. */
639void
640_cpp_free_toklist (list)
041c3194 641 const cpp_toklist *list;
15dad1d9 642{
15dad1d9
ZW
643 if (list->flags & LIST_OFFSET)
644 free (list->tokens - 1); /* Backup over dummy token. */
645 else
646 free (list->tokens);
647 free (list->namebuf);
1368ee70
ZW
648}
649
15dad1d9
ZW
650/* Compare two tokens. */
651int
652_cpp_equiv_tokens (a, b)
653 const cpp_token *a, *b;
654{
041c3194
ZW
655 if (a->type == b->type && a->flags == b->flags)
656 switch (token_spellings[a->type].type)
657 {
658 default: /* Keep compiler happy. */
659 case SPELL_OPERATOR:
660 return 1;
661 case SPELL_CHAR:
662 case SPELL_NONE:
663 return a->val.aux == b->val.aux; /* arg_no or character. */
664 case SPELL_IDENT:
bfb9dc7f 665 return a->val.node == b->val.node;
041c3194 666 case SPELL_STRING:
bfb9dc7f
ZW
667 return (a->val.str.len == b->val.str.len
668 && !memcmp (a->val.str.text, b->val.str.text,
669 a->val.str.len));
041c3194 670 }
15dad1d9 671
041c3194 672 return 0;
15dad1d9
ZW
673}
674
675/* Compare two token lists. */
676int
677_cpp_equiv_toklists (a, b)
678 const cpp_toklist *a, *b;
679{
680 unsigned int i;
681
041c3194
ZW
682 if (a->tokens_used != b->tokens_used
683 || a->flags != b->flags
684 || a->paramc != b->paramc)
15dad1d9
ZW
685 return 0;
686
687 for (i = 0; i < a->tokens_used; i++)
688 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
689 return 0;
690 return 1;
691}
692
041c3194 693/* Utility routine:
9e62c811 694
bfb9dc7f
ZW
695 Compares, the token TOKEN to the NUL-terminated string STRING.
696 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
15dad1d9 697
041c3194 698int
bfb9dc7f
ZW
699cpp_ideq (token, string)
700 const cpp_token *token;
041c3194
ZW
701 const char *string;
702{
bfb9dc7f 703 if (token->type != CPP_NAME)
041c3194 704 return 0;
bfb9dc7f
ZW
705
706 return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
15dad1d9 707}
1368ee70 708
041c3194 709/* Lexing algorithm.
45b966db 710
041c3194
ZW
711 The original lexer in cpplib was made up of two passes: a first pass
712 that replaced trigraphs and deleted esacped newlines, and a second
713 pass that tokenized the result of the first pass. Tokenisation was
714 performed by peeking at the next character in the input stream. For
715 example, if the input stream contained "!=", the handler for the !
716 character would peek at the next character, and if it were a '='
717 would skip over it, and return a "!=" token, otherwise it would
718 return just the "!" token.
61474454 719
041c3194
ZW
720 To implement a single-pass lexer, this peeking ahead is unworkable.
721 An arbitrary number of escaped newlines, and trigraphs (in particular
722 ??/ which translates to the escape \), could separate the '!' and '='
723 in the input stream, yet the next token is still a "!=".
61474454 724
041c3194
ZW
725 Suppose instead that we lex by one logical line at a time, producing
726 a token list or stack for each logical line, and when seeing the '!'
727 push a CPP_NOT token on the list. Then if the '!' is part of a
728 longer token ("!=") we know we must see the remainder of the token by
729 the time we reach the end of the logical line. Thus we can have the
730 '=' handler look at the previous token (at the end of the list / top
731 of the stack) and see if it is a "!" token, and if so, instead of
732 pushing a "=" token revise the existing token to be a "!=" token.
61474454 733
041c3194
ZW
734 This works in the presence of escaped newlines, because the '\' would
735 have been pushed on the top of the stack as a CPP_BACKSLASH. The
736 newline ('\n' or '\r') handler looks at the token at the top of the
737 stack to see if it is a CPP_BACKSLASH, and if so discards both.
738 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
739 the '=' handler would never see any intervening escaped newlines.
45b966db 740
041c3194
ZW
741 To make trigraphs work in this context, as in precedence trigraphs
742 are highest and converted before anything else, the '?' handler does
743 lookahead to see if it is a trigraph, and if so skips the trigraph
744 and pushes the token it represents onto the top of the stack. This
745 also works in the particular case of a CPP_BACKSLASH trigraph.
45b966db 746
041c3194
ZW
747 To the preprocessor, whitespace is only significant to the point of
748 knowing whether whitespace precedes a particular token. For example,
749 the '=' handler needs to know whether there was whitespace between it
750 and a "!" token on the top of the stack, to make the token conversion
751 decision correctly. So each token has a PREV_WHITE flag to
752 indicate this - the standard permits consecutive whitespace to be
753 regarded as a single space. The compiler front ends are not
754 interested in whitespace at all; they just require a token stream.
755 Another place where whitespace is significant to the preprocessor is
756 a #define statment - if there is whitespace between the macro name
757 and an initial "(" token the macro is "object-like", otherwise it is
758 a function-like macro that takes arguments.
759
760 However, all is not rosy. Parsing of identifiers, numbers, comments
761 and strings becomes trickier because of the possibility of raw
762 trigraphs and escaped newlines in the input stream.
763
764 The trigraphs are three consecutive characters beginning with two
765 question marks. A question mark is not valid as part of a number or
766 identifier, so parsing of a number or identifier terminates normally
767 upon reaching it, returning to the mainloop which handles the
768 trigraph just like it would in any other position. Similarly for the
769 backslash of a backslash-newline combination. So we just need the
770 escaped-newline dropper in the mainloop to check if the token on the
771 top of the stack after dropping the escaped newline is a number or
772 identifier, and if so to continue the processing it as if nothing had
773 happened.
774
775 For strings, we replace trigraphs whenever we reach a quote or
776 newline, because there might be a backslash trigraph escaping them.
777 We need to be careful that we start trigraph replacing from where we
778 left off previously, because it is possible for a first scan to leave
779 "fake" trigraphs that a second scan would pick up as real (e.g. the
780 sequence "????/\n=" would find a fake ??= trigraph after removing the
781 escaped newline.)
782
783 For line comments, on reaching a newline we scan the previous
784 character(s) to see if it escaped, and continue if it is. Block
785 comments ignore everything and just focus on finding the comment
786 termination mark. The only difficult thing, and it is surprisingly
787 tricky, is checking if an asterisk precedes the final slash since
788 they could be separated by escaped newlines. If the preprocessor is
789 invoked with the output comments option, we don't bother removing
790 escaped newlines and replacing trigraphs for output.
791
792 Finally, numbers can begin with a period, which is pushed initially
793 as a CPP_DOT token in its own right. The digit handler checks if the
794 previous token was a CPP_DOT not separated by whitespace, and if so
795 pops it off the stack and pushes a period into the number's buffer
796 before calling the number parser.
45b966db 797
041c3194
ZW
798*/
799
800static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
801 U":>", U"<%", U"%>"};
45b966db 802
041c3194
ZW
803/* Call when a trigraph is encountered. It warns if necessary, and
804 returns true if the trigraph should be honoured. END is the third
805 character of a trigraph in the input stream. */
45b966db 806static int
041c3194 807trigraph_ok (pfile, end)
45b966db 808 cpp_reader *pfile;
041c3194 809 const unsigned char *end;
45b966db 810{
041c3194
ZW
811 int accept = CPP_OPTION (pfile, trigraphs);
812
813 if (CPP_OPTION (pfile, warn_trigraphs))
45b966db 814 {
041c3194
ZW
815 unsigned int col = end - 1 - pfile->buffer->line_base;
816 if (accept)
817 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
818 "trigraph ??%c converted to %c",
819 (int) *end, (int) trigraph_map[*end]);
45b966db 820 else
041c3194
ZW
821 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
822 "trigraph ??%c ignored", (int) *end);
45b966db 823 }
041c3194 824 return accept;
45b966db
ZW
825}
826
041c3194
ZW
827/* Scan a string for trigraphs, warning or replacing them inline as
828 appropriate. When parsing a string, we must call this routine
829 before processing a newline character (if trigraphs are enabled),
830 since the newline might be escaped by a preceding backslash
831 trigraph sequence. Returns a pointer to the end of the name after
832 replacement. */
833
834static unsigned char *
835trigraph_replace (pfile, src, limit)
45b966db 836 cpp_reader *pfile;
041c3194
ZW
837 unsigned char *src;
838 unsigned char *limit;
45b966db 839{
041c3194
ZW
840 unsigned char *dest;
841
842 /* Starting with src[1], find two consecutive '?'. The case of no
843 trigraphs is streamlined. */
844
043afb2a 845 for (src++; src + 1 < limit; src += 2)
041c3194
ZW
846 {
847 if (src[0] != '?')
848 continue;
849
850 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
851 if (src[-1] == '?')
852 src--;
853 else if (src + 2 == limit || src[1] != '?')
854 continue;
45b966db 855
041c3194
ZW
856 /* Check if it really is a trigraph. */
857 if (trigraph_map[src[2]] == 0)
858 continue;
45b966db 859
041c3194
ZW
860 dest = src;
861 goto trigraph_found;
862 }
863 return limit;
45b966db 864
041c3194
ZW
865 /* Now we have a trigraph, we need to scan the remaining buffer, and
866 copy-shifting its contents left if replacement is enabled. */
867 for (; src + 2 < limit; dest++, src++)
868 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
869 {
870 trigraph_found:
871 src += 2;
872 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
873 *dest = trigraph_map[*src];
874 }
875
876 /* Copy remaining (at most 2) characters. */
877 while (src < limit)
878 *dest++ = *src++;
879 return dest;
45b966db
ZW
880}
881
041c3194
ZW
882/* If CUR is a backslash or the end of a trigraphed backslash, return
883 a pointer to its beginning, otherwise NULL. We don't read beyond
884 the buffer start, because there is the start of the comment in the
885 buffer. */
886static const unsigned char *
887backslash_start (pfile, cur)
64aaf407 888 cpp_reader *pfile;
041c3194 889 const unsigned char *cur;
64aaf407 890{
041c3194
ZW
891 if (cur[0] == '\\')
892 return cur;
893 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
894 && trigraph_ok (pfile, cur))
895 return cur - 2;
896 return 0;
64aaf407
NB
897}
898
041c3194
ZW
899/* Skip a C-style block comment. This is probably the trickiest
900 handler. We find the end of the comment by seeing if an asterisk
901 is before every '/' we encounter. The nasty complication is that a
902 previous asterisk may be separated by one or more escaped newlines.
903 Returns non-zero if comment terminated by EOF, zero otherwise. */
904static int
905skip_block_comment (pfile)
45b966db
ZW
906 cpp_reader *pfile;
907{
041c3194
ZW
908 cpp_buffer *buffer = pfile->buffer;
909 const unsigned char *char_after_star = 0;
52fadca8 910 const unsigned char *cur = buffer->cur;
041c3194 911
041c3194 912 for (; cur < buffer->rlimit; )
45b966db 913 {
041c3194
ZW
914 unsigned char c = *cur++;
915
916 /* People like decorating comments with '*', so check for
917 '/' instead for efficiency. */
918 if (c == '/')
45b966db 919 {
52fadca8
NB
920 /* Don't view / then * then / as finishing the comment. */
921 if ((cur[-2] == '*' && cur - 1 > buffer->cur)
922 || cur - 1 == char_after_star)
923 {
924 buffer->cur = cur;
925 return 0;
926 }
041c3194
ZW
927
928 /* Warn about potential nested comments, but not when
929 the final character inside the comment is a '/'.
930 Don't bother to get it right across escaped newlines. */
931 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
932 && cur[0] == '*' && cur[1] != '/')
45b966db 933 {
041c3194
ZW
934 buffer->cur = cur;
935 cpp_warning (pfile, "'/*' within comment");
45b966db 936 }
45b966db 937 }
91fcd158 938 else if (is_vspace (c))
45b966db 939 {
041c3194
ZW
940 const unsigned char* bslash = backslash_start (pfile, cur - 2);
941
942 handle_newline (cur, buffer->rlimit, c);
943 /* Work correctly if there is an asterisk before an
944 arbirtrarily long sequence of escaped newlines. */
945 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
946 char_after_star = cur;
947 else
948 char_after_star = 0;
45b966db 949 }
52fadca8
NB
950 else if (c == '\t')
951 adjust_column (pfile, cur - 1);
45b966db 952 }
041c3194 953
041c3194 954 buffer->cur = cur;
52fadca8 955 return 1;
45b966db
ZW
956}
957
f9a0e96c
ZW
958/* Skip a C++ line comment. Handles escaped newlines. Returns
959 non-zero if a multiline comment. */
041c3194
ZW
960static int
961skip_line_comment (pfile)
45b966db
ZW
962 cpp_reader *pfile;
963{
041c3194
ZW
964 cpp_buffer *buffer = pfile->buffer;
965 register const unsigned char *cur = buffer->cur;
966 int multiline = 0;
967
968 for (; cur < buffer->rlimit; )
969 {
970 unsigned char c = *cur++;
971
91fcd158 972 if (is_vspace (c))
45b966db 973 {
041c3194
ZW
974 /* Check for a (trigaph?) backslash escaping the newline. */
975 if (!backslash_start (pfile, cur - 2))
976 goto out;
977 multiline = 1;
978 handle_newline (cur, buffer->rlimit, c);
979 }
980 }
981 cur++;
45b966db 982
041c3194
ZW
983 out:
984 buffer->cur = cur - 1; /* Leave newline for caller. */
985 return multiline;
986}
45b966db 987
52fadca8
NB
988/* TAB points to a \t character. Update col_adjust so we track the
989 column correctly. */
990static void
991adjust_column (pfile, tab)
992 cpp_reader *pfile;
993 const U_CHAR *tab;
994{
995 /* Zero-based column. */
996 unsigned int col = CPP_BUF_COLUMN (pfile->buffer, tab);
997
998 /* Round it up to multiple of the tabstop, but subtract 1 since the
999 tab itself occupies a character position. */
1000 pfile->col_adjust += (CPP_OPTION (pfile, tabstop)
1001 - col % CPP_OPTION (pfile, tabstop)) - 1;
1002}
1003
041c3194
ZW
1004/* Skips whitespace, stopping at next non-whitespace character.
1005 Adjusts pfile->col_adjust to account for tabs. This enables tokens
1006 to be assigned the correct column. */
1007static void
1008skip_whitespace (pfile, in_directive)
1009 cpp_reader *pfile;
1010 int in_directive;
1011{
1012 cpp_buffer *buffer = pfile->buffer;
91fcd158 1013 unsigned short warned = 0;
45b966db 1014
91fcd158
NB
1015 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
1016 while (buffer->cur < buffer->rlimit)
041c3194 1017 {
91fcd158 1018 unsigned char c = *buffer->cur;
45b966db 1019
91fcd158
NB
1020 if (!is_nvspace (c))
1021 break;
1022
1023 buffer->cur++;
1024 /* Horizontal space always OK. */
1025 if (c == ' ')
1026 continue;
1027 else if (c == '\t')
52fadca8 1028 adjust_column (pfile, buffer->cur - 1);
91fcd158
NB
1029 /* Must be \f \v or \0. */
1030 else if (c == '\0')
041c3194 1031 {
91fcd158
NB
1032 if (!warned)
1033 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
1034 CPP_BUF_COL (buffer),
1035 "embedded null character ignored");
1036 warned = 1;
45b966db 1037 }
041c3194 1038 else if (in_directive && CPP_PEDANTIC (pfile))
91fcd158
NB
1039 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1040 CPP_BUF_COL (buffer),
1041 "%s in preprocessing directive",
1042 c == '\f' ? "form feed" : "vertical tab");
45b966db 1043 }
041c3194 1044}
45b966db 1045
041c3194 1046/* Parse (append) an identifier. */
417f3e3a 1047static const U_CHAR *
bfb9dc7f 1048parse_name (pfile, tok, cur, rlimit)
45b966db 1049 cpp_reader *pfile;
bfb9dc7f
ZW
1050 cpp_token *tok;
1051 const U_CHAR *cur, *rlimit;
45b966db 1052{
bfb9dc7f
ZW
1053 const U_CHAR *name = cur;
1054 unsigned int len;
041c3194 1055
bfb9dc7f 1056 while (cur < rlimit)
041c3194 1057 {
bfb9dc7f
ZW
1058 if (! is_idchar (*cur))
1059 break;
e5ec2402
ZW
1060 /* $ is not a legal identifier character in the standard, but is
1061 commonly accepted as an extension. Don't warn about it in
1062 skipped conditional blocks. */
bfb9dc7f 1063 if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
041c3194 1064 {
bfb9dc7f 1065 CPP_BUFFER (pfile)->cur = cur;
041c3194
ZW
1066 cpp_pedwarn (pfile, "'$' character in identifier");
1067 }
bfb9dc7f 1068 cur++;
041c3194 1069 }
bfb9dc7f 1070 len = cur - name;
45b966db 1071
bfb9dc7f 1072 if (tok->val.node)
041c3194 1073 {
bfb9dc7f
ZW
1074 unsigned int oldlen = tok->val.node->length;
1075 U_CHAR *newname = alloca (oldlen + len);
1076 memcpy (newname, tok->val.node->name, oldlen);
1077 memcpy (newname + oldlen, name, len);
1078 len += oldlen;
1079 name = newname;
041c3194
ZW
1080 }
1081
bfb9dc7f
ZW
1082 tok->val.node = cpp_lookup (pfile, name, len);
1083 return cur;
45b966db
ZW
1084}
1085
041c3194 1086/* Parse (append) a number. */
45b966db 1087static void
041c3194 1088parse_number (pfile, list, name)
45b966db 1089 cpp_reader *pfile;
041c3194 1090 cpp_toklist *list;
bfb9dc7f 1091 cpp_string *name;
45b966db 1092{
041c3194
ZW
1093 const unsigned char *name_limit;
1094 unsigned char *namebuf;
1095 cpp_buffer *buffer = pfile->buffer;
1096 register const unsigned char *cur = buffer->cur;
45b966db 1097
041c3194
ZW
1098 expanded:
1099 name_limit = list->namebuf + list->name_cap;
1100 namebuf = list->namebuf + list->name_used;
45b966db 1101
041c3194
ZW
1102 for (; cur < buffer->rlimit && namebuf < name_limit; )
1103 {
1104 unsigned char c = *namebuf = *cur; /* Copy a single char. */
45b966db 1105
041c3194
ZW
1106 /* Perhaps we should accept '$' here if we accept it for
1107 identifiers. We know namebuf[-1] is safe, because for c to
1108 be a sign we must have pushed at least one character. */
1109 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1110 goto out;
45b966db 1111
041c3194
ZW
1112 namebuf++;
1113 cur++;
45b966db 1114 }
64aaf407 1115
041c3194
ZW
1116 /* Run out of name space? */
1117 if (cur < buffer->rlimit)
1118 {
1119 list->name_used = namebuf - list->namebuf;
1120 auto_expand_name_space (list);
1121 goto expanded;
1122 }
1123
64aaf407 1124 out:
041c3194
ZW
1125 buffer->cur = cur;
1126 name->len = namebuf - name->text;
1127 list->name_used = namebuf - list->namebuf;
45b966db
ZW
1128}
1129
041c3194 1130/* Places a string terminated by an unescaped TERMINATOR into a
bfb9dc7f 1131 cpp_string, which should be expandable and thus at the top of the
041c3194
ZW
1132 list's stack. Handles embedded trigraphs, if necessary, and
1133 escaped newlines.
45b966db 1134
041c3194
ZW
1135 Can be used for character constants (terminator = '\''), string
1136 constants ('"') and angled headers ('>'). Multi-line strings are
1137 allowed, except for within directives. */
45b966db 1138
041c3194
ZW
1139static void
1140parse_string (pfile, list, token, terminator)
45b966db 1141 cpp_reader *pfile;
041c3194
ZW
1142 cpp_toklist *list;
1143 cpp_token *token;
1144 unsigned int terminator;
45b966db 1145{
041c3194 1146 cpp_buffer *buffer = pfile->buffer;
bfb9dc7f 1147 cpp_string *name = &token->val.str;
041c3194
ZW
1148 register const unsigned char *cur = buffer->cur;
1149 const unsigned char *name_limit;
1150 unsigned char *namebuf;
1151 unsigned int null_count = 0;
1152 unsigned int trigraphed = list->name_used;
45b966db 1153
041c3194
ZW
1154 expanded:
1155 name_limit = list->namebuf + list->name_cap;
1156 namebuf = list->namebuf + list->name_used;
f2d5f0cc 1157
041c3194 1158 for (; cur < buffer->rlimit && namebuf < name_limit; )
45b966db 1159 {
041c3194 1160 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
45b966db 1161
041c3194
ZW
1162 if (c == '\0')
1163 null_count++;
91fcd158 1164 else if (c == terminator || is_vspace (c))
45b966db 1165 {
041c3194
ZW
1166 /* Needed for trigraph_replace and multiline string warning. */
1167 buffer->cur = cur;
5eec0563 1168
041c3194
ZW
1169 /* Scan for trigraphs before checking if backslash-escaped. */
1170 if ((CPP_OPTION (pfile, trigraphs)
1171 || CPP_OPTION (pfile, warn_trigraphs))
1172 && namebuf - (list->namebuf + trigraphed) >= 3)
45b966db 1173 {
041c3194
ZW
1174 namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1175 namebuf);
1176 /* The test above guarantees trigraphed will be positive. */
1177 trigraphed = namebuf - list->namebuf - 2;
45b966db 1178 }
45b966db 1179
041c3194 1180 namebuf--; /* Drop the newline / terminator from the name. */
91fcd158 1181 if (is_vspace (c))
45b966db 1182 {
041c3194
ZW
1183 /* Drop a backslash newline, and continue. */
1184 if (namebuf[-1] == '\\')
45b966db 1185 {
041c3194
ZW
1186 handle_newline (cur, buffer->rlimit, c);
1187 namebuf--;
1188 continue;
45b966db 1189 }
45b966db 1190
041c3194 1191 cur--;
45b966db 1192
f9a0e96c
ZW
1193 /* In assembly language, silently terminate strings of
1194 either variety at end of line. This is a kludge
1195 around not knowing where comments are. */
1196 if (CPP_OPTION (pfile, lang_asm))
041c3194 1197 goto out;
64aaf407 1198
f9a0e96c
ZW
1199 /* Character constants and header names may not extend
1200 over multiple lines. In Standard C, neither may
1201 strings. We accept multiline strings as an
041c3194 1202 extension. (Even in directives - otherwise, glibc's
f9a0e96c 1203 longlong.h breaks.) */
041c3194
ZW
1204 if (terminator != '"')
1205 goto unterminated;
1206
1207 cur++; /* Move forwards again. */
45b966db 1208
041c3194
ZW
1209 if (pfile->multiline_string_line == 0)
1210 {
1211 pfile->multiline_string_line = token->line;
1212 pfile->multiline_string_column = token->col;
1213 if (CPP_PEDANTIC (pfile))
1214 cpp_pedwarn (pfile, "multi-line string constant");
1215 }
1216
1217 *namebuf++ = '\n';
1218 handle_newline (cur, buffer->rlimit, c);
45b966db
ZW
1219 }
1220 else
1221 {
041c3194 1222 unsigned char *temp;
45b966db 1223
041c3194
ZW
1224 /* An odd number of consecutive backslashes represents
1225 an escaped terminator. */
1226 temp = namebuf - 1;
1227 while (temp >= name->text && *temp == '\\')
1228 temp--;
45b966db 1229
041c3194
ZW
1230 if ((namebuf - temp) & 1)
1231 goto out;
1232 namebuf++;
1233 }
45b966db 1234 }
ff2b53ef 1235 }
45b966db 1236
041c3194
ZW
1237 /* Run out of name space? */
1238 if (cur < buffer->rlimit)
45b966db 1239 {
041c3194
ZW
1240 list->name_used = namebuf - list->namebuf;
1241 auto_expand_name_space (list);
1242 goto expanded;
1243 }
45b966db 1244
041c3194
ZW
1245 /* We may not have trigraph-replaced the input for this code path,
1246 but as the input is in error by being unterminated we don't
1247 bother. Prevent warnings about no newlines at EOF. */
91fcd158 1248 if (is_vspace (cur[-1]))
041c3194 1249 cur--;
45b966db 1250
041c3194
ZW
1251 unterminated:
1252 cpp_error (pfile, "missing terminating %c character", (int) terminator);
476f2869 1253
041c3194
ZW
1254 if (terminator == '\"' && pfile->multiline_string_line != list->line
1255 && pfile->multiline_string_line != 0)
1256 {
1257 cpp_error_with_line (pfile, pfile->multiline_string_line,
1258 pfile->multiline_string_column,
1259 "possible start of unterminated string literal");
1260 pfile->multiline_string_line = 0;
45b966db 1261 }
041c3194
ZW
1262
1263 out:
1264 buffer->cur = cur;
1265 name->len = namebuf - name->text;
1266 list->name_used = namebuf - list->namebuf;
45b966db 1267
041c3194
ZW
1268 if (null_count > 0)
1269 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1270 : "null character preserved"));
45b966db
ZW
1271}
1272
041c3194 1273/* The character TYPE helps us distinguish comment types: '*' = C
f9a0e96c
ZW
1274 style, '/' = C++ style. For code simplicity, the stored comment
1275 includes the comment start and any terminator. */
041c3194
ZW
1276
1277#define COMMENT_START_LEN 2
9e62c811 1278static void
041c3194
ZW
1279save_comment (list, token, from, len, type)
1280 cpp_toklist *list;
1281 cpp_token *token;
1282 const unsigned char *from;
9e62c811 1283 unsigned int len;
041c3194 1284 unsigned int type;
9e62c811 1285{
041c3194
ZW
1286 unsigned char *buffer;
1287
1288 len += COMMENT_START_LEN;
9e62c811 1289
041c3194
ZW
1290 if (list->name_used + len > list->name_cap)
1291 _cpp_expand_name_space (list, len);
9e62c811 1292
bfb9dc7f 1293 INIT_TOKEN_STR (list, token);
041c3194 1294 token->type = CPP_COMMENT;
bfb9dc7f 1295 token->val.str.len = len;
45b966db 1296
041c3194
ZW
1297 buffer = list->namebuf + list->name_used;
1298 list->name_used += len;
45b966db 1299
041c3194
ZW
1300 /* Copy the comment. */
1301 if (type == '*')
45b966db 1302 {
041c3194
ZW
1303 *buffer++ = '/';
1304 *buffer++ = '*';
45b966db 1305 }
041c3194 1306 else
ea4a453b 1307 {
041c3194
ZW
1308 *buffer++ = type;
1309 *buffer++ = type;
ea4a453b 1310 }
041c3194 1311 memcpy (buffer, from, len - COMMENT_START_LEN);
45b966db
ZW
1312}
1313
041c3194
ZW
1314/*
1315 * The tokenizer's main loop. Returns a token list, representing a
1316 * logical line in the input file. On EOF after some tokens have
1317 * been processed, we return immediately. Then in next call, or if
1318 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1319 * token is placed in the list.
1320 *
1321 * Implementation relies almost entirely on lookback, rather than
1322 * looking forwards. This means that tokenization requires just
1323 * a single pass of the file, even in the presence of trigraphs and
1324 * escaped newlines, providing significant performance benefits.
1325 * Trigraph overhead is negligible if they are disabled, and low
1326 * even when enabled.
1327 */
1328
91fcd158 1329#define KNOWN_DIRECTIVE() (list->directive != 0)
041c3194
ZW
1330#define MIGHT_BE_DIRECTIVE() \
1331(cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
45b966db 1332
041c3194
ZW
1333static void
1334lex_line (pfile, list)
45b966db 1335 cpp_reader *pfile;
041c3194 1336 cpp_toklist *list;
45b966db 1337{
041c3194
ZW
1338 cpp_token *cur_token, *token_limit, *first;
1339 cpp_buffer *buffer = pfile->buffer;
1340 const unsigned char *cur = buffer->cur;
1341 unsigned char flags = 0;
1342 unsigned int first_token = list->tokens_used;
45b966db 1343
041c3194
ZW
1344 if (!(list->flags & LIST_OFFSET))
1345 (abort) ();
1346
1347 list->file = buffer->nominal_fname;
1348 list->line = CPP_BUF_LINE (buffer);
1349 pfile->col_adjust = 0;
1350 pfile->in_lex_line = 1;
1351 if (cur == buffer->buf)
1352 list->flags |= BEG_OF_FILE;
1353
1354 expanded:
1355 token_limit = list->tokens + list->tokens_cap;
1356 cur_token = list->tokens + list->tokens_used;
45b966db 1357
041c3194 1358 for (; cur < buffer->rlimit && cur_token < token_limit;)
45b966db 1359 {
041c3194 1360 unsigned char c;
45b966db 1361
91fcd158
NB
1362 /* Optimize non-vertical whitespace skipping; most tokens are
1363 probably separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
1364 c = *cur;
1365 if (is_nvspace (c))
45b966db 1366 {
91fcd158
NB
1367 buffer->cur = cur;
1368 skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1369 && cur_token > &list->tokens[first_token]));
041c3194 1370 cur = buffer->cur;
ff2b53ef 1371
041c3194
ZW
1372 flags = PREV_WHITE;
1373 if (cur == buffer->rlimit)
1374 break;
91fcd158 1375 c = *cur;
45b966db 1376 }
91fcd158 1377 cur++;
45b966db 1378
041c3194
ZW
1379 /* Initialize current token. CPP_EOF will not be fixed up by
1380 expand_name_space. */
1381 list->tokens_used = cur_token - list->tokens + 1;
1382 cur_token->type = CPP_EOF;
1383 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1384 cur_token->line = CPP_BUF_LINE (buffer);
1385 cur_token->flags = flags;
1386 flags = 0;
46d07497 1387
041c3194
ZW
1388 switch (c)
1389 {
1390 case '0': case '1': case '2': case '3': case '4':
1391 case '5': case '6': case '7': case '8': case '9':
1392 {
1393 int prev_dot;
46d07497 1394
041c3194
ZW
1395 cur--; /* Backup character. */
1396 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1397 if (prev_dot)
1398 cur_token--;
bfb9dc7f 1399 INIT_TOKEN_STR (list, cur_token);
041c3194
ZW
1400 /* Prepend an immediately previous CPP_DOT token. */
1401 if (prev_dot)
1402 {
1403 if (list->name_cap == list->name_used)
1404 auto_expand_name_space (list);
46d07497 1405
bfb9dc7f 1406 cur_token->val.str.len = 1;
041c3194
ZW
1407 list->namebuf[list->name_used++] = '.';
1408 }
46d07497 1409
041c3194
ZW
1410 continue_number:
1411 cur_token->type = CPP_NUMBER; /* Before parse_number. */
1412 buffer->cur = cur;
bfb9dc7f 1413 parse_number (pfile, list, &cur_token->val.str);
041c3194
ZW
1414 cur = buffer->cur;
1415 }
1416 /* Check for # 123 form of #line. */
1417 if (MIGHT_BE_DIRECTIVE ())
1418 list->directive = _cpp_check_linemarker (pfile, cur_token,
1419 !(cur_token[-1].flags
1420 & PREV_WHITE));
1421 cur_token++;
1422 break;
46d07497 1423
041c3194
ZW
1424 letter:
1425 case '_':
1426 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1427 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1428 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1429 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1430 case 'y': case 'z':
1431 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1432 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1433 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1434 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1435 case 'Y': case 'Z':
1436 cur--; /* Backup character. */
bfb9dc7f 1437 cur_token->val.node = 0;
041c3194
ZW
1438 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
1439
1440 continue_name:
bfb9dc7f 1441 cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
45b966db 1442
041c3194
ZW
1443 if (MIGHT_BE_DIRECTIVE ())
1444 list->directive = _cpp_check_directive (pfile, cur_token,
1445 !(list->tokens[0].flags
1446 & PREV_WHITE));
1447 cur_token++;
1448 break;
f8f769ea 1449
041c3194 1450 case '\'':
041c3194 1451 case '\"':
041c3194
ZW
1452 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1453 /* Do we have a wide string? */
1454 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
f9a0e96c
ZW
1455 && cur_token[-1].val.node == pfile->spec_nodes->n_L)
1456 BACKUP_TOKEN (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
45b966db 1457
041c3194
ZW
1458 do_parse_string:
1459 /* Here c is one of ' " or >. */
bfb9dc7f 1460 INIT_TOKEN_STR (list, cur_token);
041c3194
ZW
1461 buffer->cur = cur;
1462 parse_string (pfile, list, cur_token, c);
1463 cur = buffer->cur;
1464 cur_token++;
1465 break;
45b966db 1466
041c3194
ZW
1467 case '/':
1468 cur_token->type = CPP_DIV;
1469 if (IMMED_TOKEN ())
1470 {
1471 if (PREV_TOKEN_TYPE == CPP_DIV)
1472 {
1473 /* We silently allow C++ comments in system headers,
1474 irrespective of conformance mode, because lots of
1475 broken systems do that and trying to clean it up
1476 in fixincludes is a nightmare. */
1477 if (CPP_IN_SYSTEM_HEADER (pfile))
1478 goto do_line_comment;
1479 else if (CPP_OPTION (pfile, cplusplus_comments))
1480 {
1481 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1482 && ! buffer->warned_cplusplus_comments)
1483 {
1484 buffer->cur = cur;
1485 cpp_pedwarn (pfile,
1486 "C++ style comments are not allowed in ISO C89");
1487 cpp_pedwarn (pfile,
1488 "(this will be reported only once per input file)");
1489 buffer->warned_cplusplus_comments = 1;
1490 }
1491 do_line_comment:
1492 buffer->cur = cur;
1493#if 0 /* Leave until new lexer in place. */
1494 if (cur[-2] != c)
1495 cpp_warning (pfile,
1496 "comment start split across lines");
1497#endif
1498 if (skip_line_comment (pfile))
1499 cpp_warning (pfile, "multi-line comment");
f8f769ea 1500
041c3194
ZW
1501 /* Back-up to first '-' or '/'. */
1502 cur_token--;
1503 if (!CPP_OPTION (pfile, discard_comments)
91fcd158 1504 && (!KNOWN_DIRECTIVE()
041c3194
ZW
1505 || (list->directive->flags & COMMENTS)))
1506 save_comment (list, cur_token++, cur,
1507 buffer->cur - cur, c);
f9a0e96c 1508 else
041c3194
ZW
1509 flags = PREV_WHITE;
1510
1511 cur = buffer->cur;
1512 break;
1513 }
1514 }
1515 }
1516 cur_token++;
1517 break;
1518
1519 case '*':
1520 cur_token->type = CPP_MULT;
1521 if (IMMED_TOKEN ())
45b966db 1522 {
041c3194
ZW
1523 if (PREV_TOKEN_TYPE == CPP_DIV)
1524 {
1525 buffer->cur = cur;
1526#if 0 /* Leave until new lexer in place. */
1527 if (cur[-2] != '/')
1528 cpp_warning (pfile,
1529 "comment start '/*' split across lines");
1530#endif
1531 if (skip_block_comment (pfile))
1532 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1533 "unterminated comment");
1534#if 0 /* Leave until new lexer in place. */
1535 else if (buffer->cur[-2] != '*')
1536 cpp_warning (pfile,
1537 "comment end '*/' split across lines");
1538#endif
1539 /* Back up to opening '/'. */
1540 cur_token--;
1541 if (!CPP_OPTION (pfile, discard_comments)
91fcd158 1542 && (!KNOWN_DIRECTIVE()
041c3194
ZW
1543 || (list->directive->flags & COMMENTS)))
1544 save_comment (list, cur_token++, cur,
1545 buffer->cur - cur, c);
f9a0e96c 1546 else
041c3194
ZW
1547 flags = PREV_WHITE;
1548
1549 cur = buffer->cur;
1550 break;
1551 }
1552 else if (CPP_OPTION (pfile, cplusplus))
1553 {
1554 /* In C++, there are .* and ->* operators. */
1555 if (PREV_TOKEN_TYPE == CPP_DEREF)
1556 BACKUP_TOKEN (CPP_DEREF_STAR);
1557 else if (PREV_TOKEN_TYPE == CPP_DOT)
1558 BACKUP_TOKEN (CPP_DOT_STAR);
1559 }
45b966db 1560 }
041c3194 1561 cur_token++;
f8f769ea 1562 break;
45b966db 1563
041c3194
ZW
1564 case '\n':
1565 case '\r':
1566 handle_newline (cur, buffer->rlimit, c);
1567 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
45b966db 1568 {
041c3194 1569 if (IMMED_TOKEN ())
45b966db 1570 {
041c3194
ZW
1571 /* Remove the escaped newline. Then continue to process
1572 any interrupted name or number. */
1573 cur_token--;
1574 /* Backslash-newline may not be immediately followed by
1575 EOF (C99 5.1.1.2). */
1576 if (cur >= buffer->rlimit)
1577 {
1578 cpp_pedwarn (pfile, "backslash-newline at end of file");
1579 break;
1580 }
1581 if (IMMED_TOKEN ())
1582 {
1583 cur_token--;
1584 if (cur_token->type == CPP_NAME)
1585 goto continue_name;
1586 else if (cur_token->type == CPP_NUMBER)
1587 goto continue_number;
1588 cur_token++;
1589 }
1590 /* Remember whitespace setting. */
1591 flags = cur_token->flags;
f8f769ea 1592 break;
45b966db 1593 }
041c3194
ZW
1594 else
1595 {
1596 buffer->cur = cur;
1597 cpp_warning (pfile,
1598 "backslash and newline separated by space");
1599 }
1600 }
1601 else if (MIGHT_BE_DIRECTIVE ())
1602 {
1603 /* "Null directive." C99 6.10.7: A preprocessing
1604 directive of the form # <new-line> has no effect.
1605
1606 But it is still a directive, and therefore disappears
1607 from the output. */
1608 cur_token--;
f9a0e96c
ZW
1609 if (cur_token->flags & PREV_WHITE
1610 && CPP_WTRADITIONAL (pfile))
1611 cpp_warning (pfile, "K+R C ignores #\\n with the # indented");
f8f769ea 1612 }
45b966db 1613
041c3194
ZW
1614 /* Skip vertical space until we have at least one token to
1615 return. */
1616 if (cur_token != &list->tokens[first_token])
1617 goto out;
1618 list->line = CPP_BUF_LINE (buffer);
f8f769ea 1619 break;
04e3ec78 1620
041c3194
ZW
1621 case '-':
1622 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
f9a0e96c 1623 REVISE_TOKEN (CPP_MINUS_MINUS);
041c3194
ZW
1624 else
1625 PUSH_TOKEN (CPP_MINUS);
1626 break;
45b966db 1627
041c3194
ZW
1628 make_hash:
1629 case '#':
1630 /* The digraph flag checking ensures that ## and %:%:
1631 are interpreted as CPP_PASTE, but #%: and %:# are not. */
1632 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1633 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1634 REVISE_TOKEN (CPP_PASTE);
1635 else
1636 PUSH_TOKEN (CPP_HASH);
1637 break;
04e3ec78 1638
041c3194
ZW
1639 case ':':
1640 cur_token->type = CPP_COLON;
1641 if (IMMED_TOKEN ())
1642 {
1643 if (PREV_TOKEN_TYPE == CPP_COLON
1644 && CPP_OPTION (pfile, cplusplus))
1645 BACKUP_TOKEN (CPP_SCOPE);
9b55f29a 1646 else if (CPP_OPTION (pfile, digraphs))
041c3194 1647 {
9b55f29a
NB
1648 /* Digraph: "<:" is a '[' */
1649 if (PREV_TOKEN_TYPE == CPP_LESS)
1650 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1651 /* Digraph: "%:" is a '#' */
1652 else if (PREV_TOKEN_TYPE == CPP_MOD)
1653 {
1654 (--cur_token)->flags |= DIGRAPH;
1655 goto make_hash;
1656 }
041c3194
ZW
1657 }
1658 }
1659 cur_token++;
1660 break;
f8f769ea 1661
041c3194
ZW
1662 case '&':
1663 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1664 REVISE_TOKEN (CPP_AND_AND);
1665 else
1666 PUSH_TOKEN (CPP_AND);
f8f769ea 1667 break;
45b966db 1668
041c3194
ZW
1669 make_or:
1670 case '|':
1671 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1672 REVISE_TOKEN (CPP_OR_OR);
1673 else
1674 PUSH_TOKEN (CPP_OR);
1675 break;
45b966db 1676
041c3194
ZW
1677 case '+':
1678 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1679 REVISE_TOKEN (CPP_PLUS_PLUS);
1680 else
1681 PUSH_TOKEN (CPP_PLUS);
1682 break;
45b966db 1683
041c3194
ZW
1684 case '=':
1685 /* This relies on equidistance of "?=" and "?" tokens. */
1686 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1687 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1688 else
1689 PUSH_TOKEN (CPP_EQ);
1690 break;
45b966db 1691
041c3194
ZW
1692 case '>':
1693 cur_token->type = CPP_GREATER;
1694 if (IMMED_TOKEN ())
1695 {
1696 if (PREV_TOKEN_TYPE == CPP_GREATER)
1697 BACKUP_TOKEN (CPP_RSHIFT);
1698 else if (PREV_TOKEN_TYPE == CPP_MINUS)
1699 BACKUP_TOKEN (CPP_DEREF);
9b55f29a
NB
1700 else if (CPP_OPTION (pfile, digraphs))
1701 {
1702 /* Digraph: ":>" is a ']' */
1703 if (PREV_TOKEN_TYPE == CPP_COLON)
1704 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1705 /* Digraph: "%>" is a '}' */
1706 else if (PREV_TOKEN_TYPE == CPP_MOD)
1707 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1708 }
041c3194
ZW
1709 }
1710 cur_token++;
1711 break;
1712
1713 case '<':
1714 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1715 {
1716 REVISE_TOKEN (CPP_LSHIFT);
1717 break;
1718 }
1719 /* Is this the beginning of a header name? */
91fcd158 1720 if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
041c3194
ZW
1721 {
1722 c = '>'; /* Terminator. */
1723 cur_token->type = CPP_HEADER_NAME;
1724 goto do_parse_string;
1725 }
1726 PUSH_TOKEN (CPP_LESS);
1727 break;
45b966db 1728
041c3194
ZW
1729 case '%':
1730 /* Digraph: "<%" is a '{' */
1731 cur_token->type = CPP_MOD;
9b55f29a
NB
1732 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1733 && CPP_OPTION (pfile, digraphs))
041c3194
ZW
1734 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1735 cur_token++;
1736 break;
04e3ec78 1737
041c3194
ZW
1738 case '?':
1739 if (cur + 1 < buffer->rlimit && *cur == '?'
1740 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1741 {
1742 /* Handle trigraph. */
1743 cur++;
1744 switch (*cur++)
1745 {
1746 case '(': goto make_open_square;
1747 case ')': goto make_close_square;
1748 case '<': goto make_open_brace;
1749 case '>': goto make_close_brace;
1750 case '=': goto make_hash;
1751 case '!': goto make_or;
1752 case '-': goto make_complement;
1753 case '/': goto make_backslash;
1754 case '\'': goto make_xor;
1755 }
1756 }
1757 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1758 {
1759 /* GNU C++ defines <? and >? operators. */
1760 if (PREV_TOKEN_TYPE == CPP_LESS)
1761 {
1762 REVISE_TOKEN (CPP_MIN);
1763 break;
1764 }
1765 else if (PREV_TOKEN_TYPE == CPP_GREATER)
1766 {
1767 REVISE_TOKEN (CPP_MAX);
1768 break;
1769 }
1770 }
1771 PUSH_TOKEN (CPP_QUERY);
1772 break;
45b966db 1773
041c3194
ZW
1774 case '.':
1775 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1776 && IMMED_TOKEN ()
1777 && !(cur_token[-1].flags & PREV_WHITE))
1778 {
1779 cur_token -= 2;
1780 PUSH_TOKEN (CPP_ELLIPSIS);
1781 }
1782 else
1783 PUSH_TOKEN (CPP_DOT);
1784 break;
c5a04734 1785
041c3194
ZW
1786 make_complement:
1787 case '~': PUSH_TOKEN (CPP_COMPL); break;
1788 make_xor:
1789 case '^': PUSH_TOKEN (CPP_XOR); break;
1790 make_open_brace:
1791 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1792 make_close_brace:
1793 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1794 make_open_square:
1795 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1796 make_close_square:
1797 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1798 make_backslash:
1799 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1800 case '!': PUSH_TOKEN (CPP_NOT); break;
1801 case ',': PUSH_TOKEN (CPP_COMMA); break;
1802 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1803 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1804 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
6d2c2047 1805
041c3194
ZW
1806 case '$':
1807 if (CPP_OPTION (pfile, dollars_in_ident))
1808 goto letter;
1809 /* Fall through */
041c3194
ZW
1810 default:
1811 cur_token->val.aux = c;
1812 PUSH_TOKEN (CPP_OTHER);
1813 break;
1814 }
1815 }
6d2c2047 1816
041c3194
ZW
1817 /* Run out of token space? */
1818 if (cur_token == token_limit)
1819 {
1820 list->tokens_used = cur_token - list->tokens;
1821 _cpp_expand_token_space (list, 256);
1822 goto expanded;
1823 }
6d2c2047 1824
041c3194
ZW
1825 cur_token->flags = flags;
1826 if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1827 {
91fcd158 1828 if (cur > buffer->buf && !is_vspace (cur[-1]))
041c3194
ZW
1829 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1830 CPP_BUF_COLUMN (buffer, cur),
1831 "no newline at end of file");
1832 cur_token++->type = CPP_EOF;
1833 }
6d2c2047 1834
041c3194
ZW
1835 out:
1836 /* All tokens are allocated, so the memory location is fixed. */
1837 first = &list->tokens[first_token];
1838
1839 /* Don't complain about the null directive, nor directives in
1840 assembly source: we don't know where the comments are, and # may
1841 introduce assembler pseudo-ops. Don't complain about invalid
1842 directives in skipped conditional groups (6.10 p4). */
1843 if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1844 && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1845 {
1846 if (first[1].type == CPP_NAME)
1847 cpp_error (pfile, "invalid preprocessing directive #%.*s",
bfb9dc7f 1848 (int) first[1].val.node->length, first[1].val.node->name);
041c3194
ZW
1849 else
1850 cpp_error (pfile, "invalid preprocessing directive");
1851 }
1852
91fcd158
NB
1853 /* Put EOF at end of known directives. This covers "directives do
1854 not extend beyond the end of the line (description 6.10 part 2)". */
1855 if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
041c3194
ZW
1856 {
1857 pfile->first_directive_token = first;
1858 cur_token++->type = CPP_EOF;
1859 }
1860
91fcd158
NB
1861 /* Directives, known or not, always start a new line. */
1862 if (first_token == 0 || list->tokens[first_token].type == CPP_HASH)
041c3194 1863 first->flags |= BOL;
6d2c2047 1864 else
041c3194
ZW
1865 /* 6.10.3.10: Within the sequence of preprocessing tokens making
1866 up the invocation of a function-like macro, new line is
1867 considered a normal white-space character. */
1868 first->flags |= PREV_WHITE;
1869
1870 buffer->cur = cur;
1871 list->tokens_used = cur_token - list->tokens;
1872 pfile->in_lex_line = 0;
6d2c2047
ZW
1873}
1874
041c3194
ZW
1875/* Write the spelling of a token TOKEN, with any appropriate
1876 whitespace before it, to the token_buffer. PREV is the previous
1877 token, which is used to determine if we need to shove in an extra
1878 space in order to avoid accidental token paste. */
1879static void
1880output_token (pfile, token, prev)
1881 cpp_reader *pfile;
1882 const cpp_token *token, *prev;
1883{
1884 int dummy;
c5a04734 1885
041c3194
ZW
1886 if (token->col && (token->flags & BOL))
1887 {
1888 /* Supply enough whitespace to put this token in its original
1889 column. Don't bother trying to reconstruct tabs; we can't
1890 get it right in general, and nothing ought to care. (Yes,
1891 some things do care; the fault lies with them.) */
1892 unsigned char *buffer;
1893 unsigned int spaces = token->col - 1;
1894
1895 CPP_RESERVE (pfile, token->col);
1896 buffer = pfile->limit;
1897
1898 while (spaces--)
1899 *buffer++ = ' ';
1900 pfile->limit = buffer;
1901 }
1902 else if (token->flags & PREV_WHITE)
1903 CPP_PUTC (pfile, ' ');
f9a0e96c 1904 else if (prev)
041c3194 1905 {
f9a0e96c 1906 /* Check for and prevent accidental token pasting. */
041c3194
ZW
1907 if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1908 CPP_PUTC (pfile, ' ');
f9a0e96c 1909 /* can_paste doesn't catch all the accidental pastes.
041c3194
ZW
1910 Consider a + ++b - if there is not a space between the + and ++, it
1911 will be misparsed as a++ + b. */
1912 else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1913 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1914 CPP_PUTC (pfile, ' ');
1915 }
d6d5f795 1916
041c3194
ZW
1917 CPP_RESERVE (pfile, TOKEN_LEN (token));
1918 pfile->limit = spell_token (pfile, token, pfile->limit);
1919}
d6d5f795 1920
041c3194 1921/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1922 already contain the enough space to hold the token's spelling.
1923 Returns a pointer to the character after the last character
1924 written. */
d6d5f795 1925
041c3194
ZW
1926static unsigned char *
1927spell_token (pfile, token, buffer)
1928 cpp_reader *pfile; /* Would be nice to be rid of this... */
1929 const cpp_token *token;
1930 unsigned char *buffer;
1931{
1932 switch (token_spellings[token->type].type)
1933 {
1934 case SPELL_OPERATOR:
1935 {
1936 const unsigned char *spelling;
1937 unsigned char c;
d6d5f795 1938
041c3194
ZW
1939 if (token->flags & DIGRAPH)
1940 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1941 else
1942 spelling = token_spellings[token->type].spelling;
1943
1944 while ((c = *spelling++) != '\0')
1945 *buffer++ = c;
1946 }
1947 break;
d6d5f795 1948
041c3194 1949 case SPELL_IDENT:
bfb9dc7f
ZW
1950 memcpy (buffer, token->val.node->name, token->val.node->length);
1951 buffer += token->val.node->length;
041c3194 1952 break;
d6d5f795 1953
041c3194
ZW
1954 case SPELL_STRING:
1955 {
041c3194
ZW
1956 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1957 *buffer++ = 'L';
bfb9dc7f 1958
041c3194 1959 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
bfb9dc7f
ZW
1960 *buffer++ = '"';
1961 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1962 *buffer++ = '\'';
1963
1964 memcpy (buffer, token->val.str.text, token->val.str.len);
1965 buffer += token->val.str.len;
1966
1967 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1968 *buffer++ = '"';
1969 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1970 *buffer++ = '\'';
041c3194
ZW
1971 }
1972 break;
d6d5f795 1973
041c3194
ZW
1974 case SPELL_CHAR:
1975 *buffer++ = token->val.aux;
1976 break;
d6d5f795 1977
041c3194
ZW
1978 case SPELL_NONE:
1979 cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
1980 break;
1981 }
d6d5f795 1982
041c3194
ZW
1983 return buffer;
1984}
d6d5f795 1985
cf00a885
ZW
1986/* Return the spelling of a token known to be an operator.
1987 Does not distinguish digraphs from their counterparts. */
1988const unsigned char *
1989_cpp_spell_operator (type)
1990 enum cpp_ttype type;
1991{
1992 if (token_spellings[type].type == SPELL_OPERATOR)
1993 return token_spellings[type].spelling;
1994 else
1995 return token_names[type];
1996}
1997
1998
041c3194 1999/* Macro expansion algorithm. TODO. */
d6d5f795 2000
041c3194
ZW
2001
2002/* Free the storage allocated for macro arguments. */
2003static void
2004free_macro_args (args)
2005 macro_args *args;
c5a04734 2006{
041c3194 2007 if (args->tokens)
417f3e3a 2008 free ((PTR) args->tokens);
041c3194
ZW
2009 free (args->ends);
2010 free (args);
c5a04734
ZW
2011}
2012
041c3194
ZW
2013/* Determines if a macro has been already used (and is therefore
2014 disabled). */
c5a04734 2015static int
041c3194 2016is_macro_disabled (pfile, expansion, token)
c5a04734 2017 cpp_reader *pfile;
041c3194
ZW
2018 const cpp_toklist *expansion;
2019 const cpp_token *token;
c5a04734 2020{
041c3194
ZW
2021 cpp_context *context = CURRENT_CONTEXT (pfile);
2022
cf00a885
ZW
2023 /* Don't expand anything if this file has already been preprocessed. */
2024 if (CPP_OPTION (pfile, preprocessed))
2025 return 1;
2026
041c3194
ZW
2027 /* Arguments on either side of ## are inserted in place without
2028 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
2029 occurs during a later rescan pass. The effect is that we expand
2030 iff we would as part of the macro's expansion list, so we should
2031 drop to the macro's context. */
2032 if (IS_ARG_CONTEXT (context))
c5a04734 2033 {
041c3194
ZW
2034 if (token->flags & PASTED)
2035 context--;
2036 else if (!(context->flags & CONTEXT_RAW))
2037 return 1;
2038 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2039 context--;
c5a04734 2040 }
c5a04734 2041
041c3194
ZW
2042 /* Have we already used this macro? */
2043 while (context->level > 0)
c5a04734 2044 {
041c3194
ZW
2045 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2046 return 1;
2047 /* Raw argument tokens are judged based on the token list they
2048 came from. */
2049 if (context->flags & CONTEXT_RAW)
2050 context = pfile->contexts + context->level;
2051 else
2052 context--;
2053 }
c5a04734 2054
041c3194
ZW
2055 /* Function-like macros may be disabled if the '(' is not in the
2056 current context. We check this without disrupting the context
2057 stack. */
2058 if (expansion->paramc >= 0)
2059 {
2060 const cpp_token *next;
2061 unsigned int prev_nme;
c5a04734 2062
041c3194
ZW
2063 context = CURRENT_CONTEXT (pfile);
2064 /* Drop down any contexts we're at the end of: the '(' may
2065 appear in lower macro expansions, or in the rest of the file. */
2066 while (context->posn == context->count && context > pfile->contexts)
2067 {
2068 context--;
2069 /* If we matched, we are disabled, as we appear in the
2070 expansion of each macro we meet. */
2071 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2072 return 1;
2073 }
c5a04734 2074
041c3194
ZW
2075 prev_nme = pfile->no_expand_level;
2076 pfile->no_expand_level = context - pfile->contexts;
417f3e3a 2077 next = _cpp_get_token (pfile);
041c3194
ZW
2078 restore_macro_expansion (pfile, prev_nme);
2079 if (next->type != CPP_OPEN_PAREN)
2080 {
2081 _cpp_push_token (pfile, next);
b9bf5af8 2082 if (CPP_WTRADITIONAL (pfile))
041c3194
ZW
2083 cpp_warning (pfile,
2084 "function macro %.*s must be used with arguments in traditional C",
bfb9dc7f 2085 (int) token->val.node->length, token->val.node->name);
041c3194
ZW
2086 return 1;
2087 }
c5a04734 2088 }
c5a04734 2089
041c3194 2090 return 0;
c5a04734
ZW
2091}
2092
041c3194
ZW
2093/* Add a token to the set of tokens forming the arguments to the macro
2094 being parsed in parse_args. */
2095static void
2096save_token (args, token)
2097 macro_args *args;
2098 const cpp_token *token;
c5a04734 2099{
041c3194
ZW
2100 if (args->used == args->capacity)
2101 {
2102 args->capacity += args->capacity + 100;
2103 args->tokens = (const cpp_token **)
417f3e3a
ZW
2104 xrealloc ((PTR) args->tokens,
2105 args->capacity * sizeof (const cpp_token *));
041c3194
ZW
2106 }
2107 args->tokens[args->used++] = token;
c5a04734
ZW
2108}
2109
041c3194
ZW
2110/* Take and save raw tokens until we finish one argument. Empty
2111 arguments are saved as a single CPP_PLACEMARKER token. */
2112static const cpp_token *
2113parse_arg (pfile, var_args, paren_context, args, pcount)
c5a04734 2114 cpp_reader *pfile;
041c3194
ZW
2115 int var_args;
2116 unsigned int paren_context;
2117 macro_args *args;
2118 unsigned int *pcount;
c5a04734 2119{
041c3194
ZW
2120 const cpp_token *token;
2121 unsigned int paren = 0, count = 0;
2122 int raw, was_raw = 1;
c5a04734 2123
041c3194 2124 for (count = 0;; count++)
c5a04734 2125 {
417f3e3a 2126 token = _cpp_get_token (pfile);
c5a04734 2127
041c3194 2128 switch (token->type)
c5a04734 2129 {
041c3194
ZW
2130 default:
2131 break;
c5a04734 2132
041c3194
ZW
2133 case CPP_OPEN_PAREN:
2134 paren++;
2135 break;
2136
2137 case CPP_CLOSE_PAREN:
2138 if (paren-- != 0)
2139 break;
2140 goto out;
2141
2142 case CPP_COMMA:
2143 /* Commas are not terminators within parantheses or var_args. */
2144 if (paren || var_args)
2145 break;
2146 goto out;
2147
2148 case CPP_EOF: /* Error reported by caller. */
2149 goto out;
c5a04734 2150 }
c5a04734 2151
041c3194
ZW
2152 raw = pfile->cur_context <= paren_context;
2153 if (raw != was_raw)
2154 {
2155 was_raw = raw;
2156 save_token (args, 0);
2157 count++;
c5a04734 2158 }
041c3194 2159 save_token (args, token);
c5a04734 2160 }
c5a04734
ZW
2161
2162 out:
041c3194
ZW
2163 if (count == 0)
2164 {
2165 /* Duplicate the placemarker. Then we can set its flags and
2166 position and safely be using more than one. */
2167 save_token (args, duplicate_token (pfile, &placemarker_token));
2168 count++;
2169 }
2170
2171 *pcount = count;
2172 return token;
c5a04734
ZW
2173}
2174
041c3194
ZW
2175/* This macro returns true if the argument starting at offset O of arglist
2176 A is empty - that is, it's either a single PLACEMARKER token, or a null
2177 pointer followed by a PLACEMARKER. */
2178
2179#define empty_argument(A, O) \
2180 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2181 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2182
2183/* Parse the arguments making up a macro invocation. Nested arguments
2184 are automatically macro expanded, but immediate macros are not
2185 expanded; this enables e.g. operator # to work correctly. Returns
2186 non-zero on error. */
c5a04734 2187static int
041c3194 2188parse_args (pfile, hp, args)
c5a04734 2189 cpp_reader *pfile;
041c3194
ZW
2190 cpp_hashnode *hp;
2191 macro_args *args;
c5a04734 2192{
041c3194
ZW
2193 const cpp_token *token;
2194 const cpp_toklist *macro;
2195 unsigned int total = 0;
2196 unsigned int paren_context = pfile->cur_context;
2197 int argc = 0;
2198
2199 macro = hp->value.expansion;
2200 do
c5a04734 2201 {
041c3194 2202 unsigned int count;
c5a04734 2203
041c3194
ZW
2204 token = parse_arg (pfile, (argc + 1 == macro->paramc
2205 && (macro->flags & VAR_ARGS)),
2206 paren_context, args, &count);
2207 if (argc < macro->paramc)
c5a04734 2208 {
041c3194
ZW
2209 total += count;
2210 args->ends[argc] = total;
c5a04734 2211 }
041c3194 2212 argc++;
c5a04734 2213 }
041c3194 2214 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
c5a04734 2215
041c3194
ZW
2216 if (token->type == CPP_EOF)
2217 {
2218 cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2219 hp->length, hp->name);
2220 return 1;
2221 }
2222 else if (argc < macro->paramc)
2223 {
2224 /* A rest argument is allowed to not appear in the invocation at all.
2225 e.g. #define debug(format, args...) ...
2226 debug("string");
2227 This is exactly the same as if the rest argument had received no
563dd08a 2228 tokens - debug("string",); This extension is deprecated. */
041c3194 2229
563dd08a 2230 if (argc + 1 == macro->paramc && (macro->flags & GNU_REST_ARGS))
041c3194
ZW
2231 {
2232 /* Duplicate the placemarker. Then we can set its flags and
2233 position and safely be using more than one. */
2234 save_token (args, duplicate_token (pfile, &placemarker_token));
2235 args->ends[argc] = total + 1;
2236 return 0;
2237 }
2238 else
2239 {
2240 cpp_error (pfile,
2241 "insufficient arguments in invocation of macro \"%.*s\"",
2242 hp->length, hp->name);
2243 return 1;
2244 }
2245 }
2246 /* An empty argument to an empty function-like macro is fine. */
2247 else if (argc > macro->paramc
2248 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2249 {
2250 cpp_error (pfile,
2251 "too many arguments in invocation of macro \"%.*s\"",
2252 hp->length, hp->name);
2253 return 1;
2254 }
c5a04734 2255
041c3194
ZW
2256 return 0;
2257}
2258
2259/* Adds backslashes before all backslashes and double quotes appearing
2260 in strings. Non-printable characters are converted to octal. */
2261static U_CHAR *
2262quote_string (dest, src, len)
2263 U_CHAR *dest;
2264 const U_CHAR *src;
2265 unsigned int len;
2266{
2267 while (len--)
c5a04734 2268 {
041c3194 2269 U_CHAR c = *src++;
c5a04734 2270
041c3194 2271 if (c == '\\' || c == '"')
6ab3e7dd 2272 {
041c3194
ZW
2273 *dest++ = '\\';
2274 *dest++ = c;
2275 }
2276 else
2277 {
2278 if (ISPRINT (c))
2279 *dest++ = c;
2280 else
2281 {
2282 sprintf ((char *) dest, "\\%03o", c);
2283 dest += 4;
2284 }
6ab3e7dd 2285 }
c5a04734 2286 }
c5a04734 2287
041c3194 2288 return dest;
c5a04734
ZW
2289}
2290
041c3194
ZW
2291/* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2292 CPP_STRING token containing TEXT in quoted form. */
2293static cpp_token *
2294make_string_token (token, text, len)
2295 cpp_token *token;
2296 const U_CHAR *text;
2297 unsigned int len;
2298{
2299 U_CHAR *buf;
2300
2301 buf = (U_CHAR *) xmalloc (len * 4);
2302 token->type = CPP_STRING;
2303 token->flags = 0;
bfb9dc7f
ZW
2304 token->val.str.text = buf;
2305 token->val.str.len = quote_string (buf, text, len) - buf;
041c3194
ZW
2306 return token;
2307}
2308
2309/* Allocates and converts a temporary token to a CPP_NUMBER token,
2310 evaluating to NUMBER. */
2311static cpp_token *
2312alloc_number_token (pfile, number)
c5a04734 2313 cpp_reader *pfile;
041c3194 2314 int number;
c5a04734 2315{
041c3194
ZW
2316 cpp_token *result;
2317 char *buf;
2318
2319 result = get_temp_token (pfile);
2320 buf = xmalloc (20);
2321 sprintf (buf, "%d", number);
2322
2323 result->type = CPP_NUMBER;
2324 result->flags = 0;
bfb9dc7f
ZW
2325 result->val.str.text = (U_CHAR *) buf;
2326 result->val.str.len = strlen (buf);
041c3194
ZW
2327 return result;
2328}
c5a04734 2329
041c3194
ZW
2330/* Returns a temporary token from the temporary token store of PFILE. */
2331static cpp_token *
2332get_temp_token (pfile)
2333 cpp_reader *pfile;
2334{
2335 if (pfile->temp_used == pfile->temp_alloced)
2336 {
2337 if (pfile->temp_used == pfile->temp_cap)
2338 {
2339 pfile->temp_cap += pfile->temp_cap + 20;
2340 pfile->temp_tokens = (cpp_token **) xrealloc
2341 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2342 }
2343 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2344 (sizeof (cpp_token));
2345 }
c5a04734 2346
041c3194
ZW
2347 return pfile->temp_tokens[pfile->temp_used++];
2348}
2349
2350/* Release (not free) for re-use the temporary tokens of PFILE. */
2351static void
2352release_temp_tokens (pfile)
2353 cpp_reader *pfile;
2354{
2355 while (pfile->temp_used)
c5a04734 2356 {
041c3194 2357 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
c5a04734 2358
bfb9dc7f 2359 if (token_spellings[token->type].type == SPELL_STRING)
c5a04734 2360 {
bfb9dc7f
ZW
2361 free ((char *) token->val.str.text);
2362 token->val.str.text = 0;
c5a04734
ZW
2363 }
2364 }
041c3194 2365}
c5a04734 2366
041c3194
ZW
2367/* Free all of PFILE's dynamically-allocated temporary tokens. */
2368void
2369_cpp_free_temp_tokens (pfile)
2370 cpp_reader *pfile;
2371{
2372 if (pfile->temp_tokens)
c5a04734 2373 {
041c3194
ZW
2374 /* It is possible, though unlikely (looking for '(' of a funlike
2375 macro into EOF), that we haven't released the tokens yet. */
2376 release_temp_tokens (pfile);
2377 while (pfile->temp_alloced)
2378 free (pfile->temp_tokens[--pfile->temp_alloced]);
2379 free (pfile->temp_tokens);
c5a04734
ZW
2380 }
2381
041c3194
ZW
2382 if (pfile->date)
2383 {
bfb9dc7f 2384 free ((char *) pfile->date->val.str.text);
041c3194 2385 free (pfile->date);
bfb9dc7f 2386 free ((char *) pfile->time->val.str.text);
041c3194
ZW
2387 free (pfile->time);
2388 }
c5a04734
ZW
2389}
2390
041c3194
ZW
2391/* Copy TOKEN into a temporary token from PFILE's store. */
2392static cpp_token *
2393duplicate_token (pfile, token)
2394 cpp_reader *pfile;
2395 const cpp_token *token;
2396{
2397 cpp_token *result = get_temp_token (pfile);
c5a04734 2398
041c3194 2399 *result = *token;
bfb9dc7f 2400 if (token_spellings[token->type].type == SPELL_STRING)
041c3194 2401 {
bfb9dc7f
ZW
2402 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2403 memcpy (buff, token->val.str.text, token->val.str.len);
2404 result->val.str.text = buff;
041c3194
ZW
2405 }
2406 return result;
2407}
c5a04734 2408
041c3194
ZW
2409/* Determine whether two tokens can be pasted together, and if so,
2410 what the resulting token is. Returns CPP_EOF if the tokens cannot
2411 be pasted, or the appropriate type for the merged token if they
2412 can. */
2413static enum cpp_ttype
2414can_paste (pfile, token1, token2, digraph)
2415 cpp_reader * pfile;
2416 const cpp_token *token1, *token2;
2417 int* digraph;
c5a04734 2418{
041c3194
ZW
2419 enum cpp_ttype a = token1->type, b = token2->type;
2420 int cxx = CPP_OPTION (pfile, cplusplus);
c5a04734 2421
041c3194
ZW
2422 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2423 return a + (CPP_EQ_EQ - CPP_EQ);
c5a04734 2424
041c3194 2425 switch (a)
c5a04734 2426 {
041c3194
ZW
2427 case CPP_GREATER:
2428 if (b == a) return CPP_RSHIFT;
2429 if (b == CPP_QUERY && cxx) return CPP_MAX;
2430 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2431 break;
2432 case CPP_LESS:
2433 if (b == a) return CPP_LSHIFT;
2434 if (b == CPP_QUERY && cxx) return CPP_MIN;
2435 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
9b55f29a
NB
2436 if (CPP_OPTION (pfile, digraphs))
2437 {
2438 if (b == CPP_COLON)
2439 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2440 if (b == CPP_MOD)
2441 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2442 }
041c3194 2443 break;
c5a04734 2444
041c3194
ZW
2445 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2446 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2447 case CPP_OR: if (b == a) return CPP_OR_OR; break;
c5a04734 2448
041c3194
ZW
2449 case CPP_MINUS:
2450 if (b == a) return CPP_MINUS_MINUS;
2451 if (b == CPP_GREATER) return CPP_DEREF;
2452 break;
2453 case CPP_COLON:
2454 if (b == a && cxx) return CPP_SCOPE;
9b55f29a 2455 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
041c3194
ZW
2456 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2457 break;
2458
2459 case CPP_MOD:
9b55f29a
NB
2460 if (CPP_OPTION (pfile, digraphs))
2461 {
2462 if (b == CPP_GREATER)
2463 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2464 if (b == CPP_COLON)
2465 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2466 }
041c3194
ZW
2467 break;
2468 case CPP_DEREF:
2469 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2470 break;
2471 case CPP_DOT:
2472 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2473 if (b == CPP_NUMBER) return CPP_NUMBER;
2474 break;
2475
2476 case CPP_HASH:
2477 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2478 /* %:%: digraph */
2479 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2480 break;
2481
2482 case CPP_NAME:
2483 if (b == CPP_NAME) return CPP_NAME;
2484 if (b == CPP_NUMBER
bfb9dc7f 2485 && is_numstart(token2->val.str.text[0])) return CPP_NAME;
041c3194 2486 if (b == CPP_CHAR
bfb9dc7f 2487 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
041c3194 2488 if (b == CPP_STRING
bfb9dc7f 2489 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
041c3194
ZW
2490 break;
2491
2492 case CPP_NUMBER:
2493 if (b == CPP_NUMBER) return CPP_NUMBER;
2494 if (b == CPP_NAME) return CPP_NUMBER;
2495 if (b == CPP_DOT) return CPP_NUMBER;
2496 /* Numbers cannot have length zero, so this is safe. */
2497 if ((b == CPP_PLUS || b == CPP_MINUS)
bfb9dc7f 2498 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
041c3194
ZW
2499 return CPP_NUMBER;
2500 break;
2501
2502 default:
2503 break;
c5a04734
ZW
2504 }
2505
041c3194
ZW
2506 return CPP_EOF;
2507}
2508
2509/* Check if TOKEN is to be ##-pasted with the token after it. */
2510static const cpp_token *
2511maybe_paste_with_next (pfile, token)
2512 cpp_reader *pfile;
2513 const cpp_token *token;
2514{
2515 cpp_token *pasted;
2516 const cpp_token *second;
2517 cpp_context *context = CURRENT_CONTEXT (pfile);
2518
2519 /* Is this token on the LHS of ## ? */
041c3194 2520
417f3e3a
ZW
2521 while ((token->flags & PASTE_LEFT)
2522 || ((context->flags & CONTEXT_PASTEL)
2523 && context->posn == context->count))
c5a04734 2524 {
417f3e3a
ZW
2525 /* Suppress macro expansion for next token, but don't conflict
2526 with the other method of suppression. If it is an argument,
2527 macro expansion within the argument will still occur. */
2528 pfile->paste_level = pfile->cur_context;
2529 second = _cpp_get_token (pfile);
2530 pfile->paste_level = 0;
2531
2532 /* Ignore placemarker argument tokens (cannot be from an empty
2533 macro since macros are not expanded). */
2534 if (token->type == CPP_PLACEMARKER)
2535 pasted = duplicate_token (pfile, second);
2536 else if (second->type == CPP_PLACEMARKER)
041c3194 2537 {
417f3e3a
ZW
2538 cpp_context *mac_context = CURRENT_CONTEXT (pfile) - 1;
2539 /* GCC has special extended semantics for a ## b where b is
2540 a varargs parameter: a disappears if b consists of no
2541 tokens. This extension is deprecated. */
2542 if ((mac_context->u.list->flags & GNU_REST_ARGS)
2543 && (mac_context->u.list->tokens[mac_context->posn-1].val.aux + 1
2544 == (unsigned) mac_context->u.list->paramc))
2545 {
2546 cpp_warning (pfile, "deprecated GNU ## extension used");
2547 pasted = duplicate_token (pfile, second);
2548 }
2549 else
2550 pasted = duplicate_token (pfile, token);
041c3194
ZW
2551 }
2552 else
041c3194 2553 {
417f3e3a
ZW
2554 int digraph = 0;
2555 enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
c5a04734 2556
417f3e3a
ZW
2557 if (type == CPP_EOF)
2558 {
2559 if (CPP_OPTION (pfile, warn_paste))
2560 cpp_warning (pfile,
2561 "pasting would not give a valid preprocessing token");
2562 _cpp_push_token (pfile, second);
2563 return token;
2564 }
c5a04734 2565
417f3e3a
ZW
2566 if (type == CPP_NAME || type == CPP_NUMBER)
2567 {
2568 /* Join spellings. */
2569 U_CHAR *buf, *end;
2570
2571 pasted = get_temp_token (pfile);
2572 buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2573 end = spell_token (pfile, token, buf);
2574 end = spell_token (pfile, second, end);
2575 *end = '\0';
041c3194 2576
417f3e3a
ZW
2577 if (type == CPP_NAME)
2578 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2579 else
2580 {
2581 pasted->val.str.text = uxstrdup (buf);
2582 pasted->val.str.len = end - buf;
2583 }
2584 }
2585 else if (type == CPP_WCHAR || type == CPP_WSTRING)
2586 pasted = duplicate_token (pfile, second);
bfb9dc7f
ZW
2587 else
2588 {
417f3e3a
ZW
2589 pasted = get_temp_token (pfile);
2590 pasted->val.integer = 0;
bfb9dc7f 2591 }
417f3e3a
ZW
2592
2593 pasted->type = type;
2594 pasted->flags = digraph ? DIGRAPH : 0;
041c3194
ZW
2595 }
2596
417f3e3a
ZW
2597 /* The pasted token gets the whitespace flags and position of the
2598 first token, the PASTE_LEFT flag of the second token, plus the
2599 PASTED flag to indicate it is the result of a paste. However, we
2600 want to preserve the DIGRAPH flag. */
2601 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2602 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2603 | (second->flags & PASTE_LEFT) | PASTED);
2604 pasted->col = token->col;
2605 pasted->line = token->line;
2606
2607 /* See if there is another token to be pasted onto the one we just
2608 constructed. */
2609 token = pasted;
2610 context = CURRENT_CONTEXT (pfile);
2611 /* and loop */
041c3194 2612 }
417f3e3a 2613 return token;
041c3194
ZW
2614}
2615
2616/* Convert a token sequence to a single string token according to the
2617 rules of the ISO C #-operator. */
2618#define INIT_SIZE 200
2619static cpp_token *
2620stringify_arg (pfile, token)
c5a04734 2621 cpp_reader *pfile;
041c3194 2622 const cpp_token *token;
c5a04734 2623{
041c3194
ZW
2624 cpp_token *result;
2625 unsigned char *main_buf;
2626 unsigned int prev_value, backslash_count = 0;
2627 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
c5a04734 2628
417f3e3a 2629 push_arg_context (pfile, token);
041c3194
ZW
2630 prev_value = prevent_macro_expansion (pfile);
2631 main_buf = (unsigned char *) xmalloc (buf_cap);
c5a04734 2632
041c3194
ZW
2633 result = get_temp_token (pfile);
2634 ASSIGN_FLAGS_AND_POS (result, token);
2635
417f3e3a 2636 for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
c5a04734 2637 {
041c3194
ZW
2638 int escape;
2639 unsigned char *buf;
2640 unsigned int len = TOKEN_LEN (token);
c5a04734 2641
041c3194
ZW
2642 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2643 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2644 if (escape)
2645 len *= 4 + 1;
2646
2647 if (buf_used + len > buf_cap)
c5a04734 2648 {
041c3194
ZW
2649 buf_cap = buf_used + len + INIT_SIZE;
2650 main_buf = xrealloc (main_buf, buf_cap);
2651 }
c5a04734 2652
041c3194
ZW
2653 if (whitespace && (token->flags & PREV_WHITE))
2654 main_buf[buf_used++] = ' ';
c5a04734 2655
041c3194
ZW
2656 if (escape)
2657 buf = (unsigned char *) xmalloc (len);
2658 else
2659 buf = main_buf + buf_used;
2660
2661 len = spell_token (pfile, token, buf) - buf;
2662 if (escape)
2663 {
2664 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2665 free (buf);
2666 }
2667 else
2668 buf_used += len;
c5a04734 2669
041c3194
ZW
2670 whitespace = 1;
2671 if (token->type == CPP_BACKSLASH)
2672 backslash_count++;
2673 else
2674 backslash_count = 0;
2675 }
c5a04734 2676
041c3194
ZW
2677 /* Ignore the final \ of invalid string literals. */
2678 if (backslash_count & 1)
2679 {
2680 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2681 buf_used--;
2682 }
c5a04734 2683
041c3194 2684 result->type = CPP_STRING;
bfb9dc7f
ZW
2685 result->val.str.text = main_buf;
2686 result->val.str.len = buf_used;
041c3194
ZW
2687 restore_macro_expansion (pfile, prev_value);
2688 return result;
2689}
c5a04734 2690
041c3194
ZW
2691/* Allocate more room on the context stack of PFILE. */
2692static void
2693expand_context_stack (pfile)
2694 cpp_reader *pfile;
2695{
2696 pfile->context_cap += pfile->context_cap + 20;
2697 pfile->contexts = (cpp_context *)
2698 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2699}
c5a04734 2700
041c3194
ZW
2701/* Push the context of macro NODE onto the context stack. TOKEN is
2702 the CPP_NAME token invoking the macro. */
417f3e3a
ZW
2703static int
2704push_macro_context (pfile, token)
041c3194 2705 cpp_reader *pfile;
041c3194
ZW
2706 const cpp_token *token;
2707{
2708 unsigned char orig_flags;
2709 macro_args *args;
2710 cpp_context *context;
417f3e3a 2711 cpp_hashnode *node = token->val.node;
c5a04734 2712
041c3194
ZW
2713 /* Token's flags may change when parsing args containing a nested
2714 invocation of this macro. */
2715 orig_flags = token->flags & (PREV_WHITE | BOL);
2716 args = 0;
2717 if (node->value.expansion->paramc >= 0)
c5a04734 2718 {
041c3194
ZW
2719 unsigned int error, prev_nme;
2720
2721 /* Allocate room for the argument contexts, and parse them. */
2722 args = (macro_args *) xmalloc (sizeof (macro_args));
2723 args->ends = (unsigned int *)
2724 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2725 args->tokens = 0;
2726 args->capacity = 0;
2727 args->used = 0;
2728 args->level = pfile->cur_context;
2729
2730 prev_nme = prevent_macro_expansion (pfile);
2731 pfile->args = args;
2732 error = parse_args (pfile, node, args);
2733 pfile->args = 0;
2734 restore_macro_expansion (pfile, prev_nme);
2735 if (error)
2736 {
2737 free_macro_args (args);
417f3e3a 2738 return 1;
041c3194 2739 }
c5a04734 2740 }
c5a04734 2741
041c3194
ZW
2742 /* Now push its context. */
2743 pfile->cur_context++;
2744 if (pfile->cur_context == pfile->context_cap)
2745 expand_context_stack (pfile);
2746
2747 context = CURRENT_CONTEXT (pfile);
2748 context->u.list = node->value.expansion;
2749 context->args = args;
2750 context->posn = 0;
2751 context->count = context->u.list->tokens_used;
2752 context->level = pfile->cur_context;
2753 context->flags = 0;
2754 context->pushed_token = 0;
2755
2756 /* Set the flags of the first token. We know there must
2757 be one, empty macros are a single placemarker token. */
2758 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2759
417f3e3a 2760 return 0;
c5a04734
ZW
2761}
2762
041c3194
ZW
2763/* Push an argument to the current macro onto the context stack.
2764 TOKEN is the MACRO_ARG token representing the argument expansion. */
417f3e3a 2765static void
041c3194
ZW
2766push_arg_context (pfile, token)
2767 cpp_reader *pfile;
2768 const cpp_token *token;
c5a04734 2769{
041c3194
ZW
2770 cpp_context *context;
2771 macro_args *args;
2772
2773 pfile->cur_context++;
2774 if (pfile->cur_context == pfile->context_cap)
2775 expand_context_stack (pfile);
2776
2777 context = CURRENT_CONTEXT (pfile);
2778 args = context[-1].args;
2779
2780 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2781 context->u.arg = args->tokens + context->count;
2782 context->count = args->ends[token->val.aux] - context->count;
2783 context->args = 0;
2784 context->posn = 0;
2785 context->level = args->level;
2786 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2787 context->pushed_token = 0;
2788
2789 /* Set the flags of the first token. There is one. */
2790 {
2791 const cpp_token *first = context->u.arg[0];
2792 if (!first)
2793 first = context->u.arg[1];
c5a04734 2794
041c3194
ZW
2795 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2796 token->flags & (PREV_WHITE | BOL));
2797 }
c5a04734 2798
041c3194
ZW
2799 if (token->flags & PASTE_LEFT)
2800 context->flags |= CONTEXT_PASTEL;
2801 if (pfile->paste_level)
2802 context->flags |= CONTEXT_PASTER;
c5a04734
ZW
2803}
2804
041c3194
ZW
2805/* "Unget" a token. It is effectively inserted in the token queue and
2806 will be returned by the next call to get_raw_token. */
c5a04734 2807void
041c3194 2808_cpp_push_token (pfile, token)
c5a04734 2809 cpp_reader *pfile;
041c3194 2810 const cpp_token *token;
c5a04734 2811{
041c3194
ZW
2812 cpp_context *context = CURRENT_CONTEXT (pfile);
2813 if (context->pushed_token)
2814 cpp_ice (pfile, "two tokens pushed in a row");
2815 if (token->type != CPP_EOF)
2816 context->pushed_token = token;
2817 /* Don't push back a directive's CPP_EOF, step back instead. */
2818 else if (pfile->cur_context == 0)
2819 pfile->contexts[0].posn--;
2820}
c5a04734 2821
041c3194
ZW
2822/* Handle a preprocessing directive. TOKEN is the CPP_HASH token
2823 introducing the directive. */
2824static void
2825process_directive (pfile, token)
2826 cpp_reader *pfile;
2827 const cpp_token *token;
2828{
2829 const struct directive *d = pfile->token_list.directive;
2830 int prev_nme = 0;
2831
2832 /* Skip over the directive name. */
2833 if (token[1].type == CPP_NAME)
2834 _cpp_get_raw_token (pfile);
2835 else if (token[1].type != CPP_NUMBER)
2836 cpp_ice (pfile, "directive begins with %s?!",
2837 token_names[token[1].type]);
2838
2839 /* Flush pending tokens at this point, in case the directive produces
2840 output. XXX Directive output won't be visible to a direct caller of
2841 cpp_get_token. */
2842 if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
2843 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
2844
2845 if (! (d->flags & EXPAND))
2846 prev_nme = prevent_macro_expansion (pfile);
2847 (void) (*d->handler) (pfile);
2848 if (! (d->flags & EXPAND))
2849 restore_macro_expansion (pfile, prev_nme);
2850 _cpp_skip_rest_of_line (pfile);
2851}
c5a04734 2852
041c3194
ZW
2853/* The external interface to return the next token. All macro
2854 expansion and directive processing is handled internally, the
2855 caller only ever sees the output after preprocessing. */
2856const cpp_token *
2857cpp_get_token (pfile)
2858 cpp_reader *pfile;
2859{
2860 const cpp_token *token;
417f3e3a 2861 /* Loop till we hit a non-directive, non-placemarker token. */
041c3194
ZW
2862 for (;;)
2863 {
417f3e3a
ZW
2864 token = _cpp_get_token (pfile);
2865
2866 if (token->type == CPP_PLACEMARKER)
2867 continue;
2868
2869 if (token->type == CPP_HASH && token->flags & BOL
041c3194 2870 && pfile->token_list.directive)
c5a04734 2871 {
041c3194
ZW
2872 process_directive (pfile, token);
2873 continue;
c5a04734
ZW
2874 }
2875
417f3e3a
ZW
2876 return token;
2877 }
2878}
2879
2880/* The internal interface to return the next token. There are two
2881 differences between the internal and external interfaces: the
2882 internal interface may return a PLACEMARKER token, and it does not
2883 process directives. */
2884const cpp_token *
2885_cpp_get_token (pfile)
2886 cpp_reader *pfile;
2887{
2888 const cpp_token *token;
2889 cpp_hashnode *node;
2890
2891 /* Loop until we hit a non-macro token. */
2892 for (;;)
2893 {
2894 token = get_raw_token (pfile);
2895
041c3194
ZW
2896 /* Short circuit EOF. */
2897 if (token->type == CPP_EOF)
2898 return token;
417f3e3a
ZW
2899
2900 /* If we are skipping... */
2901 if (pfile->skipping)
c5a04734 2902 {
417f3e3a
ZW
2903 /* we still have to process directives, */
2904 if (pfile->token_list.directive)
2905 return token;
2906
2907 /* but everything else is ignored. */
041c3194
ZW
2908 _cpp_skip_rest_of_line (pfile);
2909 continue;
2910 }
c5a04734 2911
417f3e3a
ZW
2912 /* If there's a potential control macro and we get here, then that
2913 #ifndef didn't cover the entire file and its argument shouldn't
2914 be taken as a control macro. */
2915 pfile->potential_control_macro = 0;
c5a04734 2916
417f3e3a
ZW
2917 /* See if there's a token to paste with this one. */
2918 if (!pfile->paste_level)
2919 token = maybe_paste_with_next (pfile, token);
c5a04734 2920
417f3e3a
ZW
2921 /* If it isn't a macro, return it now. */
2922 if (token->type != CPP_NAME
2923 || token->val.node->type == T_VOID)
2924 return token;
c5a04734 2925
417f3e3a
ZW
2926 /* Is macro expansion disabled in general? */
2927 if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
2928 return token;
041c3194 2929
417f3e3a
ZW
2930 node = token->val.node;
2931 if (node->type != T_MACRO)
2932 return special_symbol (pfile, node, token);
c5a04734 2933
041c3194
ZW
2934 if (is_macro_disabled (pfile, node->value.expansion, token))
2935 return token;
c5a04734 2936
417f3e3a
ZW
2937 if (pfile->cur_context > CPP_STACK_MAX)
2938 {
2939 cpp_error (pfile, "macros nested too deep invoking '%s'", node->name);
2940 return token;
2941 }
2942
2943 if (push_macro_context (pfile, token))
2944 return token;
2945 /* else loop */
041c3194 2946 }
041c3194 2947}
c5a04734 2948
041c3194
ZW
2949/* Returns the next raw token, i.e. without performing macro
2950 expansion. Argument contexts are automatically entered. */
2951static const cpp_token *
2952get_raw_token (pfile)
2953 cpp_reader *pfile;
2954{
2955 const cpp_token *result;
417f3e3a 2956 cpp_context *context;
c5a04734 2957
417f3e3a 2958 for (;;)
041c3194 2959 {
417f3e3a
ZW
2960 context = CURRENT_CONTEXT (pfile);
2961 if (context->pushed_token)
041c3194 2962 {
417f3e3a
ZW
2963 result = context->pushed_token;
2964 context->pushed_token = 0;
2965 }
2966 else if (context->posn == context->count)
2967 {
2968 if (pop_context (pfile))
2969 return &eof_token;
2970 continue;
2971 }
2972 else
2973 {
2974 if (IS_ARG_CONTEXT (context))
c5a04734 2975 {
041c3194 2976 result = context->u.arg[context->posn++];
417f3e3a
ZW
2977 if (result == 0)
2978 {
2979 context->flags ^= CONTEXT_RAW;
2980 result = context->u.arg[context->posn++];
2981 }
2982 return result; /* Cannot be a CPP_MACRO_ARG */
c5a04734 2983 }
417f3e3a 2984 result = &context->u.list->tokens[context->posn++];
041c3194 2985 }
c5a04734 2986
417f3e3a
ZW
2987 if (result->type != CPP_MACRO_ARG)
2988 return result;
2989
2990 if (result->flags & STRINGIFY_ARG)
2991 return stringify_arg (pfile, result);
2992
2993 push_arg_context (pfile, result);
2994 }
041c3194 2995}
c5a04734 2996
041c3194
ZW
2997/* Internal interface to get the token without macro expanding. */
2998const cpp_token *
2999_cpp_get_raw_token (pfile)
3000 cpp_reader *pfile;
3001{
3002 int prev_nme = prevent_macro_expansion (pfile);
417f3e3a 3003 const cpp_token *result = _cpp_get_token (pfile);
041c3194
ZW
3004 restore_macro_expansion (pfile, prev_nme);
3005 return result;
3006}
c5a04734 3007
041c3194
ZW
3008/* A thin wrapper to lex_line. CLEAR is non-zero if the current token
3009 list should be overwritten, or zero if we need to append
3010 (typically, if we are within the arguments to a macro, or looking
3011 for the '(' to start a function-like macro invocation). */
3012static int
3013lex_next (pfile, clear)
3014 cpp_reader *pfile;
3015 int clear;
3016{
3017 cpp_toklist *list = &pfile->token_list;
3018 const cpp_token *old_list = list->tokens;
3019 unsigned int old_used = list->tokens_used;
c5a04734 3020
041c3194 3021 if (clear)
c5a04734 3022 {
041c3194
ZW
3023 /* Release all temporary tokens. */
3024 _cpp_clear_toklist (list);
3025 pfile->contexts[0].posn = 0;
3026 if (pfile->temp_used)
3027 release_temp_tokens (pfile);
c5a04734 3028 }
041c3194
ZW
3029
3030 lex_line (pfile, list);
3031 pfile->contexts[0].count = list->tokens_used;
c5a04734 3032
041c3194 3033 if (!clear && pfile->args)
c5a04734 3034 {
041c3194
ZW
3035 /* Fix up argument token pointers. */
3036 if (old_list != list->tokens)
3037 {
3038 unsigned int i;
3039
3040 for (i = 0; i < pfile->args->used; i++)
3041 {
3042 const cpp_token *token = pfile->args->tokens[i];
3043 if (token >= old_list && token < old_list + old_used)
3044 pfile->args->tokens[i] = (const cpp_token *)
3045 ((char *) token + ((char *) list->tokens - (char *) old_list));
3046 }
3047 }
3048
3049 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3050 tokens within the list of arguments that would otherwise act as
3051 preprocessing directives, the behavior is undefined.
3052
3053 This implementation will report a hard error and treat the
3054 'sequence of preprocessing tokens' as part of the macro argument,
3055 not a directive.
3056
3057 Note if pfile->args == 0, we're OK since we're only inside a
3058 macro argument after a '('. */
3059 if (list->directive)
3060 {
3061 cpp_error_with_line (pfile, list->tokens[old_used].line,
3062 list->tokens[old_used].col,
3063 "#%s may not be used inside a macro argument",
3064 list->directive->name);
91fcd158 3065 return 1;
041c3194 3066 }
c5a04734
ZW
3067 }
3068
041c3194 3069 return 0;
c5a04734
ZW
3070}
3071
417f3e3a
ZW
3072/* Pops a context off the context stack. If we're at the bottom, lexes
3073 the next logical line. Returns EOF if we're at the end of the
041c3194
ZW
3074 argument list to the # operator, or if it is illegal to "overflow"
3075 into the rest of the file (e.g. 6.10.3.1.1). */
3076static int
417f3e3a 3077pop_context (pfile)
041c3194
ZW
3078 cpp_reader *pfile;
3079{
3080 cpp_context *context;
3fef5b2b 3081
041c3194 3082 if (pfile->cur_context == 0)
417f3e3a
ZW
3083 {
3084 /* If we are currently processing a directive, do not advance. 6.10
3085 paragraph 2: A new-line character ends the directive even if it
3086 occurs within what would otherwise be an invocation of a
3087 function-like macro. */
3088 if (pfile->token_list.directive)
3089 return 1;
3090
3091 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3092 }
041c3194
ZW
3093
3094 /* Argument contexts, when parsing args or handling # operator
3095 return CPP_EOF at the end. */
3096 context = CURRENT_CONTEXT (pfile);
3097 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3098 return 1;
3099
3100 /* Free resources when leaving macro contexts. */
3101 if (context->args)
3102 free_macro_args (context->args);
3103
3104 if (pfile->cur_context == pfile->no_expand_level)
3105 pfile->no_expand_level--;
3106 pfile->cur_context--;
3107
3108 return 0;
3109}
3110
041c3194
ZW
3111/* Turn off macro expansion at the current context level. */
3112static unsigned int
3113prevent_macro_expansion (pfile)
3114 cpp_reader *pfile;
3115{
3116 unsigned int prev_value = pfile->no_expand_level;
3117 pfile->no_expand_level = pfile->cur_context;
3118 return prev_value;
3119}
3120
3121/* Restore macro expansion to its previous state. */
3122static void
3123restore_macro_expansion (pfile, prev_value)
3124 cpp_reader *pfile;
3125 unsigned int prev_value;
3126{
3127 pfile->no_expand_level = prev_value;
3128}
3129
3130/* Used by cpperror.c to obtain the correct line and column to report
3131 in a diagnostic. */
3132unsigned int
3133_cpp_get_line (pfile, pcol)
3134 cpp_reader *pfile;
3135 unsigned int *pcol;
3136{
3137 unsigned int index;
3138 const cpp_token *cur_token;
3139
3140 if (pfile->in_lex_line)
3141 index = pfile->token_list.tokens_used;
3142 else
3143 index = pfile->contexts[0].posn;
3144
3145 cur_token = &pfile->token_list.tokens[index - 1];
3146 if (pcol)
3147 *pcol = cur_token->col;
3148 return cur_token->line;
3149}
3150
3151#define DSC(str) (const U_CHAR *)str, sizeof str - 1
3152static const char * const monthnames[] =
3153{
3154 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3155 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3156};
3157
3158/* Handle builtin macros like __FILE__. */
3159static const cpp_token *
3160special_symbol (pfile, node, token)
3161 cpp_reader *pfile;
3162 cpp_hashnode *node;
d1d9a6bd 3163 const cpp_token *token;
3fef5b2b 3164{
041c3194
ZW
3165 cpp_token *result;
3166 cpp_buffer *ip;
3fef5b2b 3167
041c3194 3168 switch (node->type)
3fef5b2b 3169 {
041c3194
ZW
3170 case T_FILE:
3171 case T_BASE_FILE:
3fef5b2b 3172 {
041c3194 3173 const char *file;
3fef5b2b 3174
041c3194
ZW
3175 ip = CPP_BUFFER (pfile);
3176 if (ip == 0)
3177 file = "";
3fef5b2b 3178 else
041c3194
ZW
3179 {
3180 if (node->type == T_BASE_FILE)
3181 while (CPP_PREV_BUFFER (ip) != NULL)
3182 ip = CPP_PREV_BUFFER (ip);
3183
3184 file = ip->nominal_fname;
3185 }
3186 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3187 strlen (file));
3188 }
3189 break;
3fef5b2b 3190
041c3194 3191 case T_INCLUDE_LEVEL:
f9a0e96c
ZW
3192 /* pfile->include_depth counts the primary source as level 1,
3193 but historically __INCLUDE_DEPTH__ has called the primary
3194 source level 0. */
3195 result = alloc_number_token (pfile, pfile->include_depth - 1);
3fef5b2b
NB
3196 break;
3197
041c3194
ZW
3198 case T_SPECLINE:
3199 /* If __LINE__ is embedded in a macro, it must expand to the
3200 line of the macro's invocation, not its definition.
3201 Otherwise things like assert() will not work properly. */
3202 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
5d7ee2fa
NB
3203 break;
3204
041c3194 3205 case T_STDC:
3fef5b2b 3206 {
041c3194 3207 int stdc = 1;
3fef5b2b 3208
041c3194
ZW
3209#ifdef STDC_0_IN_SYSTEM_HEADERS
3210 if (CPP_IN_SYSTEM_HEADER (pfile)
bfb9dc7f 3211 && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
041c3194
ZW
3212 stdc = 0;
3213#endif
3214 result = alloc_number_token (pfile, stdc);
3fef5b2b
NB
3215 }
3216 break;
3217
041c3194
ZW
3218 case T_DATE:
3219 case T_TIME:
3220 if (pfile->date == 0)
3221 {
3222 /* Allocate __DATE__ and __TIME__ from permanent storage,
3223 and save them in pfile so we don't have to do this again.
3224 We don't generate these strings at init time because
3225 time() and localtime() are very slow on some systems. */
3226 time_t tt = time (NULL);
3227 struct tm *tb = localtime (&tt);
3228
3229 pfile->date = make_string_token
3230 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3231 pfile->time = make_string_token
3232 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3233
bfb9dc7f 3234 sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
041c3194 3235 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
bfb9dc7f 3236 sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
041c3194
ZW
3237 tb->tm_hour, tb->tm_min, tb->tm_sec);
3238 }
3239 result = node->type == T_DATE ? pfile->date: pfile->time;
3fef5b2b
NB
3240 break;
3241
041c3194 3242 case T_POISON:
bfb9dc7f 3243 cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
041c3194
ZW
3244 return token;
3245
3246 default:
3247 cpp_ice (pfile, "invalid special hash type");
3248 return token;
3fef5b2b
NB
3249 }
3250
041c3194
ZW
3251 ASSIGN_FLAGS_AND_POS (result, token);
3252 return result;
3253}
3254#undef DSC
3255
3256/* Dump the original user's spelling of argument index ARG_NO to the
3257 macro whose expansion is LIST. */
3258static void
3259dump_param_spelling (pfile, list, arg_no)
3260 cpp_reader *pfile;
3261 const cpp_toklist *list;
3262 unsigned int arg_no;
3263{
3264 const U_CHAR *param = list->namebuf;
3265
3266 while (arg_no--)
3267 param += ustrlen (param) + 1;
3268 CPP_PUTS (pfile, param, ustrlen (param));
3fef5b2b
NB
3269}
3270
041c3194 3271/* Dump a token list to the output. */
c5a04734 3272void
041c3194
ZW
3273_cpp_dump_list (pfile, list, token, flush)
3274 cpp_reader *pfile;
3275 const cpp_toklist *list;
3276 const cpp_token *token;
3277 int flush;
c5a04734 3278{
041c3194
ZW
3279 const cpp_token *limit = list->tokens + list->tokens_used;
3280 const cpp_token *prev = 0;
c5a04734 3281
041c3194
ZW
3282 /* Avoid the CPP_EOF. */
3283 if (list->directive)
3284 limit--;
c5a04734 3285
041c3194 3286 while (token < limit)
c5a04734 3287 {
041c3194
ZW
3288 if (token->type == CPP_MACRO_ARG)
3289 {
3290 if (token->flags & PREV_WHITE)
3291 CPP_PUTC (pfile, ' ');
3292 if (token->flags & STRINGIFY_ARG)
3293 CPP_PUTC (pfile, '#');
3294 dump_param_spelling (pfile, list, token->val.aux);
3295 }
c5a04734 3296 else
041c3194
ZW
3297 output_token (pfile, token, prev);
3298 if (token->flags & PASTE_LEFT)
3299 CPP_PUTS (pfile, " ##", 3);
3300 prev = token;
3301 token++;
c5a04734 3302 }
041c3194
ZW
3303
3304 if (flush && pfile->printer)
3305 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
c5a04734
ZW
3306}
3307
041c3194
ZW
3308/* Allocate pfile->input_buffer, and initialize trigraph_map[]
3309 if it hasn't happened already. */
3310
3311void
3312_cpp_init_input_buffer (pfile)
3313 cpp_reader *pfile;
3314{
417f3e3a
ZW
3315 cpp_context *base;
3316
041c3194 3317 init_trigraph_map ();
417f3e3a
ZW
3318 _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3319 pfile->no_expand_level = UINT_MAX;
041c3194 3320 pfile->context_cap = 20;
041c3194 3321 pfile->cur_context = 0;
041c3194 3322
417f3e3a
ZW
3323 pfile->contexts = (cpp_context *)
3324 xmalloc (pfile->context_cap * sizeof (cpp_context));
041c3194 3325
417f3e3a
ZW
3326 /* Clear the base context. */
3327 base = &pfile->contexts[0];
3328 base->u.list = &pfile->token_list;
3329 base->posn = 0;
3330 base->count = 0;
3331 base->args = 0;
3332 base->level = 0;
3333 base->flags = 0;
3334 base->pushed_token = 0;
041c3194
ZW
3335}
3336
3337/* Moves to the end of the directive line, popping contexts as
3338 necessary. */
3339void
3340_cpp_skip_rest_of_line (pfile)
3341 cpp_reader *pfile;
3342{
417f3e3a
ZW
3343 /* Discard all stacked contexts. */
3344 int i;
3345 for (i = pfile->cur_context; i > 0; i--)
3346 if (pfile->contexts[i].args)
3347 free_macro_args (pfile->contexts[i].args);
3348
3349 if (pfile->no_expand_level <= pfile->cur_context)
3350 pfile->no_expand_level = 0;
3351 pfile->cur_context = 0;
041c3194 3352
417f3e3a
ZW
3353 /* Clear the base context, and clear the directive pointer so that
3354 get_raw_token will advance to the next line. */
3355 pfile->contexts[0].count = 0;
3356 pfile->contexts[0].posn = 0;
041c3194 3357 pfile->token_list.directive = 0;
c5a04734
ZW
3358}
3359
041c3194
ZW
3360/* Directive handler wrapper used by the command line option
3361 processor. */
3362void
3363_cpp_run_directive (pfile, dir, buf, count)
3364 cpp_reader *pfile;
3365 const struct directive *dir;
3366 const char *buf;
3367 size_t count;
3368{
3369 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3370 {
3371 unsigned int prev_lvl = 0;
417f3e3a
ZW
3372
3373 /* Scan the line now, else prevent_macro_expansion won't work. */
3374 lex_next (pfile, 1);
041c3194
ZW
3375 if (! (dir->flags & EXPAND))
3376 prev_lvl = prevent_macro_expansion (pfile);
3377
3378 (void) (*dir->handler) (pfile);
3379
3380 if (! (dir->flags & EXPAND))
3381 restore_macro_expansion (pfile, prev_lvl);
3382
3383 _cpp_skip_rest_of_line (pfile);
3384 cpp_pop_buffer (pfile);
3385 }
3386}
This page took 0.582835 seconds and 5 git commands to generate.