]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
combine.c (simplify_comparison): Shift a NOT out of a single bit test.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
041c3194
ZW
23/*
24
25Cleanups to do:-
26
041c3194 27o Check line numbers assigned to all errors.
041c3194 28o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
041c3194
ZW
29o Distinguish integers, floats, and 'other' pp-numbers.
30o Store ints and char constants as binary values.
31o New command-line assertion syntax.
041c3194
ZW
32o Work towards functions in cpperror.c taking a message level parameter.
33 If we do this, merge the common code of do_warning and do_error.
34o Comment all functions, and describe macro expansion algorithm.
35o Move as much out of header files as possible.
36o Remove single quote pairs `', and some '', from diagnostics.
37o Correct pastability test for CPP_NAME and CPP_NUMBER.
38
39*/
40
45b966db
ZW
41#include "config.h"
42#include "system.h"
43#include "intl.h"
44#include "cpplib.h"
45#include "cpphash.h"
041c3194 46#include "symcat.h"
45b966db 47
f9a0e96c
ZW
48static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
49static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
50
51/* Flags for cpp_context. */
52#define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
53#define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
54#define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
55#define CONTEXT_ARG (1 << 3) /* If an argument context. */
56
57typedef struct cpp_context cpp_context;
58struct cpp_context
59{
60 union
61 {
62 const cpp_toklist *list; /* Used for macro contexts only. */
63 const cpp_token **arg; /* Used for arg contexts only. */
64 } u;
65
66 /* Pushed token to be returned by next call to get_raw_token. */
67 const cpp_token *pushed_token;
68
f67798e7
NB
69 struct macro_args *args; /* The arguments for a function-like
70 macro. NULL otherwise. */
f9a0e96c
ZW
71 unsigned short posn; /* Current posn, index into u. */
72 unsigned short count; /* No. of tokens in u. */
73 unsigned short level;
74 unsigned char flags;
75};
76
77typedef struct macro_args macro_args;
78struct macro_args
79{
80 unsigned int *ends;
81 const cpp_token **tokens;
82 unsigned int capacity;
83 unsigned int used;
84 unsigned short level;
85};
86
87static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
88static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
89 macro_args *, unsigned int *));
90static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
91static void save_token PARAMS ((macro_args *, const cpp_token *));
92static int pop_context PARAMS ((cpp_reader *));
93static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
94static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
95static void free_macro_args PARAMS ((macro_args *));
96
041c3194
ZW
97#define auto_expand_name_space(list) \
98 _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
58fea6af 99static void dump_param_spelling PARAMS ((FILE *, const cpp_toklist *,
1368ee70 100 unsigned int));
041c3194 101static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
9e62c811 102 unsigned int));
f2d5f0cc 103
041c3194
ZW
104static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
105static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
b8f41010
NB
106 unsigned char *));
107static const unsigned char *backslash_start PARAMS ((cpp_reader *,
108 const unsigned char *));
041c3194
ZW
109static int skip_block_comment PARAMS ((cpp_reader *));
110static int skip_line_comment PARAMS ((cpp_reader *));
52fadca8 111static void adjust_column PARAMS ((cpp_reader *, const U_CHAR *));
b8f41010 112static void skip_whitespace PARAMS ((cpp_reader *, int));
bfb9dc7f
ZW
113static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
114 const U_CHAR *, const U_CHAR *));
115static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
041c3194
ZW
116static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
117 unsigned int));
b8f41010 118static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
f624ffa7
NB
119static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
120 const unsigned char *,
ad265aa4 121 unsigned int, unsigned int));
041c3194
ZW
122static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
123static int lex_next PARAMS ((cpp_reader *, int));
124static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
125 const cpp_token *));
b8f41010 126
041c3194
ZW
127static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
128static void expand_context_stack PARAMS ((cpp_reader *));
d1d9a6bd 129static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
041c3194 130 unsigned char *));
58fea6af
ZW
131static void output_token PARAMS ((cpp_reader *, FILE *, const cpp_token *,
132 const cpp_token *, int));
b8f41010
NB
133typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
134 cpp_token *));
041c3194
ZW
135static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
136 unsigned int));
137static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
138static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
139 const cpp_token *));
140static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
141static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
142 const cpp_token *));
143static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
144 const cpp_token *, int *));
145static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
146static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
147static cpp_token *get_temp_token PARAMS ((cpp_reader *));
148static void release_temp_tokens PARAMS ((cpp_reader *));
149static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
150static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
b8f41010 151
bfb9dc7f
ZW
152#define INIT_TOKEN_STR(list, token) \
153 do {(token)->val.str.len = 0; \
154 (token)->val.str.text = (list)->namebuf + (list)->name_used; \
d1d9a6bd 155 } while (0)
b8f41010 156
041c3194
ZW
157#define VALID_SIGN(c, prevc) \
158 (((c) == '+' || (c) == '-') && \
159 ((prevc) == 'e' || (prevc) == 'E' \
160 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
161
b8f41010
NB
162/* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
163 character, if any, is in buffer. */
041c3194 164
b8f41010 165#define handle_newline(cur, limit, c) \
041c3194 166 do { \
b8f41010
NB
167 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
168 (cur)++; \
041c3194
ZW
169 pfile->buffer->lineno++; \
170 pfile->buffer->line_base = (cur); \
6ab3e7dd 171 pfile->col_adjust = 0; \
041c3194 172 } while (0)
b8f41010 173
041c3194 174#define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
b8f41010
NB
175#define PREV_TOKEN_TYPE (cur_token[-1].type)
176
f9a0e96c
ZW
177#define PUSH_TOKEN(ttype) cur_token++->type = (ttype)
178#define REVISE_TOKEN(ttype) cur_token[-1].type = (ttype)
179#define BACKUP_TOKEN(ttype) (--cur_token)->type = (ttype)
f617b8e2
NB
180#define BACKUP_DIGRAPH(ttype) do { \
181 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
182
183/* An upper bound on the number of bytes needed to spell a token,
184 including preceding whitespace. */
58fea6af
ZW
185static inline size_t TOKEN_LEN PARAMS ((const cpp_token *));
186static inline size_t
187TOKEN_LEN (token)
188 const cpp_token *token;
189{
190 size_t len;
191
192 switch (TOKEN_SPELL (token))
193 {
194 default: len = 0; break;
195 case SPELL_STRING: len = token->val.str.len; break;
196 case SPELL_IDENT: len = token->val.node->length; break;
197 }
198 return len + 5;
199}
f617b8e2 200
f9a0e96c
ZW
201#define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
202#define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
5ef865d5 203#define ON_REST_ARG(c) \
ff94c747
NB
204 (((c)->u.list->flags & VAR_ARGS) \
205 && (c)->u.list->tokens[(c)->posn - 1].val.aux \
206 == (unsigned int) ((c)->u.list->paramc - 1))
f9a0e96c
ZW
207
208#define ASSIGN_FLAGS_AND_POS(d, s) \
209 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
210 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
211 } while (0)
212
213/* f is flags, just consisting of PREV_WHITE | BOL. */
214#define MODIFY_FLAGS_AND_POS(d, s, f) \
215 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
216 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
217 } while (0)
218
96be6998
ZW
219#define OP(e, s) { SPELL_OPERATOR, U s },
220#define TK(e, s) { s, U STRINGX (e) },
b8f41010 221
041c3194 222const struct token_spelling
96be6998 223_cpp_token_spellings [N_TTYPES] = {TTYPE_TABLE };
041c3194 224
96be6998
ZW
225#undef OP
226#undef TK
b8f41010 227
f2d5f0cc
ZW
228/* Notify the compiler proper that the current line number has jumped,
229 or the current file name has changed. */
230
231static void
1368ee70 232output_line_command (pfile, print, line)
45b966db 233 cpp_reader *pfile;
f2d5f0cc 234 cpp_printer *print;
1368ee70 235 unsigned int line;
45b966db 236{
041c3194 237 cpp_buffer *ip = CPP_BUFFER (pfile);
f2d5f0cc 238
041c3194
ZW
239 if (line == 0)
240 return;
241
242 /* End the previous line of text. */
243 if (pfile->need_newline)
58fea6af
ZW
244 {
245 putc ('\n', print->outf);
246 print->lineno++;
247 }
041c3194
ZW
248 pfile->need_newline = 0;
249
f2d5f0cc
ZW
250 if (CPP_OPTION (pfile, no_line_commands))
251 return;
252
f2d5f0cc
ZW
253 /* If the current file has not changed, we can output a few newlines
254 instead if we want to increase the line number by a small amount.
255 We cannot do this if print->lineno is zero, because that means we
256 haven't output any line commands yet. (The very first line
58fea6af
ZW
257 command output is a `same_file' command.)
258
259 'nominal_fname' values are unique, so they can be compared by
260 comparing pointers. */
261 if (ip->nominal_fname == print->last_fname && print->lineno > 0
f2d5f0cc 262 && line >= print->lineno && line < print->lineno + 8)
45b966db 263 {
f2d5f0cc 264 while (line > print->lineno)
45b966db 265 {
f2d5f0cc
ZW
266 putc ('\n', print->outf);
267 print->lineno++;
45b966db 268 }
f2d5f0cc 269 return;
45b966db 270 }
f2d5f0cc 271
58fea6af
ZW
272 fprintf (print->outf, "# %u \"%s\"%s\n", line, ip->nominal_fname,
273 cpp_syshdr_flags (pfile, ip));
274
275 print->last_fname = ip->nominal_fname;
f2d5f0cc
ZW
276 print->lineno = line;
277}
278
58fea6af
ZW
279/* Like fprintf, but writes to a printer object. You should be sure
280 always to generate a complete line when you use this function. */
f2d5f0cc 281void
58fea6af
ZW
282cpp_printf VPARAMS ((cpp_reader *pfile, cpp_printer *print,
283 const char *fmt, ...))
284{
285 va_list ap;
286#ifndef ANSI_PROTOTYPES
287 cpp_reader *pfile;
288 cpp_printer *print;
289 const char *fmt;
290#endif
45b966db 291
58fea6af
ZW
292 VA_START (ap, fmt);
293
294#ifndef ANSI_PROTOTYPES
295 pfile = va_arg (ap, cpp_reader *);
296 print = va_arg (ap, cpp_printer *);
297 fmt = va_arg (ap, const char *);
298#endif
299
300 /* End the previous line of text. */
301 if (pfile->need_newline)
3cb553b4
ZW
302 {
303 putc ('\n', print->outf);
304 print->lineno++;
305 }
58fea6af
ZW
306 pfile->need_newline = 0;
307
308 vfprintf (print->outf, fmt, ap);
309 va_end (ap);
45b966db
ZW
310}
311
c56c2073 312/* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
f2d5f0cc
ZW
313
314void
315cpp_scan_buffer_nooutput (pfile)
316 cpp_reader *pfile;
317{
041c3194 318 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
2c0accc9 319 const cpp_token *token;
041c3194 320
2c0accc9 321 /* In no-output mode, we can ignore everything but directives. */
f2d5f0cc
ZW
322 for (;;)
323 {
2c0accc9
ZW
324 token = _cpp_get_token (pfile);
325
041c3194
ZW
326 if (token->type == CPP_EOF)
327 {
328 cpp_pop_buffer (pfile);
329 if (CPP_BUFFER (pfile) == stop)
330 break;
331 }
2c0accc9
ZW
332
333 if (token->type == CPP_HASH && token->flags & BOL
334 && pfile->token_list.directive)
335 {
336 process_directive (pfile, token);
337 continue;
338 }
339
041c3194 340 _cpp_skip_rest_of_line (pfile);
f2d5f0cc 341 }
f2d5f0cc
ZW
342}
343
c56c2073 344/* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
f2d5f0cc
ZW
345void
346cpp_scan_buffer (pfile, print)
347 cpp_reader *pfile;
348 cpp_printer *print;
349{
c56c2073 350 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
041c3194 351 const cpp_token *token, *prev = 0;
f2d5f0cc
ZW
352
353 for (;;)
354 {
2c0accc9 355 token = _cpp_get_token (pfile);
041c3194 356 if (token->type == CPP_EOF)
f2d5f0cc 357 {
041c3194 358 cpp_pop_buffer (pfile);
d0089985 359
041c3194 360 if (CPP_BUFFER (pfile) == stop)
f2d5f0cc 361 return;
2c0accc9 362
041c3194
ZW
363 prev = 0;
364 continue;
365 }
366
367 if (token->flags & BOL)
368 {
3cb553b4
ZW
369 output_line_command (pfile, print, pfile->token_list.line);
370 prev = 0;
371
2c0accc9
ZW
372 if (token->type == CPP_HASH && pfile->token_list.directive)
373 {
374 process_directive (pfile, token);
375 continue;
376 }
f2d5f0cc 377 }
041c3194 378
2c0accc9 379 if (token->type != CPP_PLACEMARKER)
f9a0e96c 380 {
58fea6af
ZW
381 output_token (pfile, print->outf, token, prev, 1);
382 pfile->need_newline = 1;
f9a0e96c
ZW
383 }
384
f9a0e96c
ZW
385 prev = token;
386 }
f9a0e96c
ZW
387}
388
041c3194
ZW
389/* Helper routine used by parse_include, which can't see spell_token.
390 Reinterpret the current line as an h-char-sequence (< ... >); we are
391 looking at the first token after the <. */
392const cpp_token *
393_cpp_glue_header_name (pfile)
45b966db
ZW
394 cpp_reader *pfile;
395{
041c3194
ZW
396 const cpp_token *t;
397 cpp_token *hdr;
58fea6af
ZW
398 U_CHAR *buf, *p;
399 size_t len, avail;
400
401 avail = 40;
402 len = 0;
403 buf = xmalloc (avail);
041c3194
ZW
404
405 for (;;)
406 {
417f3e3a 407 t = _cpp_get_token (pfile);
041c3194
ZW
408 if (t->type == CPP_GREATER || t->type == CPP_EOF)
409 break;
410
58fea6af
ZW
411 if (len + TOKEN_LEN (t) > avail)
412 {
413 avail = len + TOKEN_LEN (t) + 40;
414 buf = xrealloc (buf, avail);
415 }
416
041c3194 417 if (t->flags & PREV_WHITE)
58fea6af
ZW
418 buf[len++] = ' ';
419
420 p = spell_token (pfile, t, buf + len);
421 len = (size_t) (p - buf); /* p known >= buf */
041c3194
ZW
422 }
423
424 if (t->type == CPP_EOF)
425 cpp_error (pfile, "missing terminating > character");
45b966db 426
58fea6af 427 buf = xrealloc (buf, len);
041c3194
ZW
428
429 hdr = get_temp_token (pfile);
430 hdr->type = CPP_HEADER_NAME;
431 hdr->flags = 0;
bfb9dc7f
ZW
432 hdr->val.str.text = buf;
433 hdr->val.str.len = len;
041c3194 434 return hdr;
45b966db
ZW
435}
436
1368ee70
ZW
437/* Token-buffer helper functions. */
438
d1d9a6bd
NB
439/* Expand a token list's string space. It is *vital* that
440 list->tokens_used is correct, to get pointer fix-up right. */
041c3194
ZW
441void
442_cpp_expand_name_space (list, len)
1368ee70 443 cpp_toklist *list;
c5a04734
ZW
444 unsigned int len;
445{
f617b8e2 446 const U_CHAR *old_namebuf;
f617b8e2
NB
447
448 old_namebuf = list->namebuf;
c5a04734
ZW
449 list->name_cap += len;
450 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
f617b8e2
NB
451
452 /* Fix up token text pointers. */
79f50f2a 453 if (list->namebuf != old_namebuf)
f617b8e2
NB
454 {
455 unsigned int i;
456
457 for (i = 0; i < list->tokens_used; i++)
96be6998 458 if (TOKEN_SPELL (&list->tokens[i]) == SPELL_STRING)
bfb9dc7f 459 list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
f617b8e2 460 }
1368ee70
ZW
461}
462
041c3194
ZW
463/* If there is not enough room for LEN more characters, expand the
464 list by just enough to have room for LEN characters. */
465void
466_cpp_reserve_name_space (list, len)
467 cpp_toklist *list;
468 unsigned int len;
469{
470 unsigned int room = list->name_cap - list->name_used;
471
472 if (room < len)
473 _cpp_expand_name_space (list, len - room);
474}
475
1368ee70 476/* Expand the number of tokens in a list. */
d1d9a6bd
NB
477void
478_cpp_expand_token_space (list, count)
1368ee70 479 cpp_toklist *list;
d1d9a6bd 480 unsigned int count;
1368ee70 481{
d1d9a6bd
NB
482 unsigned int n;
483
484 list->tokens_cap += count;
485 n = list->tokens_cap;
15dad1d9 486 if (list->flags & LIST_OFFSET)
d1d9a6bd 487 list->tokens--, n++;
1368ee70 488 list->tokens = (cpp_token *)
d1d9a6bd 489 xrealloc (list->tokens, n * sizeof (cpp_token));
15dad1d9
ZW
490 if (list->flags & LIST_OFFSET)
491 list->tokens++; /* Skip the dummy. */
1368ee70
ZW
492}
493
d1d9a6bd
NB
494/* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
495 an extra token in front of the token list, as this allows the lexer
496 to always peek at the previous token without worrying about
497 underflowing the list, and some initial space. Otherwise, no
498 token- or name-space is allocated, and there is no dummy token. */
15dad1d9 499void
d1d9a6bd 500_cpp_init_toklist (list, flags)
1368ee70 501 cpp_toklist *list;
d1d9a6bd 502 int flags;
1368ee70 503{
d1d9a6bd
NB
504 if (flags == NO_DUMMY_TOKEN)
505 {
506 list->tokens_cap = 0;
041c3194 507 list->tokens = 0;
d1d9a6bd 508 list->name_cap = 0;
041c3194 509 list->namebuf = 0;
d1d9a6bd
NB
510 list->flags = 0;
511 }
512 else
513 {
514 /* Initialize token space. Put a dummy token before the start
515 that will fail matches. */
516 list->tokens_cap = 256; /* 4K's worth. */
517 list->tokens = (cpp_token *)
518 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
519 list->tokens[0].type = CPP_EOF;
520 list->tokens++;
521
522 /* Initialize name space. */
523 list->name_cap = 1024;
041c3194 524 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
d1d9a6bd
NB
525 list->flags = LIST_OFFSET;
526 }
15dad1d9 527
15dad1d9
ZW
528 _cpp_clear_toklist (list);
529}
1368ee70 530
15dad1d9
ZW
531/* Clear a token list. */
532void
533_cpp_clear_toklist (list)
534 cpp_toklist *list;
535{
c5a04734
ZW
536 list->tokens_used = 0;
537 list->name_used = 0;
041c3194
ZW
538 list->directive = 0;
539 list->paramc = 0;
540 list->params_len = 0;
15dad1d9
ZW
541 list->flags &= LIST_OFFSET; /* clear all but that one */
542}
543
544/* Free a token list. Does not free the list itself, which may be
545 embedded in a larger structure. */
546void
547_cpp_free_toklist (list)
041c3194 548 const cpp_toklist *list;
15dad1d9 549{
15dad1d9
ZW
550 if (list->flags & LIST_OFFSET)
551 free (list->tokens - 1); /* Backup over dummy token. */
552 else
553 free (list->tokens);
554 free (list->namebuf);
1368ee70
ZW
555}
556
15dad1d9
ZW
557/* Compare two tokens. */
558int
559_cpp_equiv_tokens (a, b)
560 const cpp_token *a, *b;
561{
041c3194 562 if (a->type == b->type && a->flags == b->flags)
96be6998 563 switch (TOKEN_SPELL (a))
041c3194
ZW
564 {
565 default: /* Keep compiler happy. */
566 case SPELL_OPERATOR:
567 return 1;
568 case SPELL_CHAR:
569 case SPELL_NONE:
570 return a->val.aux == b->val.aux; /* arg_no or character. */
571 case SPELL_IDENT:
bfb9dc7f 572 return a->val.node == b->val.node;
041c3194 573 case SPELL_STRING:
bfb9dc7f
ZW
574 return (a->val.str.len == b->val.str.len
575 && !memcmp (a->val.str.text, b->val.str.text,
576 a->val.str.len));
041c3194 577 }
15dad1d9 578
041c3194 579 return 0;
15dad1d9
ZW
580}
581
582/* Compare two token lists. */
583int
584_cpp_equiv_toklists (a, b)
585 const cpp_toklist *a, *b;
586{
587 unsigned int i;
588
041c3194
ZW
589 if (a->tokens_used != b->tokens_used
590 || a->flags != b->flags
591 || a->paramc != b->paramc)
15dad1d9
ZW
592 return 0;
593
594 for (i = 0; i < a->tokens_used; i++)
595 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
596 return 0;
597 return 1;
598}
599
041c3194 600/* Utility routine:
9e62c811 601
bfb9dc7f
ZW
602 Compares, the token TOKEN to the NUL-terminated string STRING.
603 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
15dad1d9 604
041c3194 605int
bfb9dc7f
ZW
606cpp_ideq (token, string)
607 const cpp_token *token;
041c3194
ZW
608 const char *string;
609{
bfb9dc7f 610 if (token->type != CPP_NAME)
041c3194 611 return 0;
bfb9dc7f
ZW
612
613 return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
15dad1d9 614}
1368ee70 615
041c3194 616/* Lexing algorithm.
45b966db 617
041c3194
ZW
618 The original lexer in cpplib was made up of two passes: a first pass
619 that replaced trigraphs and deleted esacped newlines, and a second
620 pass that tokenized the result of the first pass. Tokenisation was
621 performed by peeking at the next character in the input stream. For
622 example, if the input stream contained "!=", the handler for the !
623 character would peek at the next character, and if it were a '='
624 would skip over it, and return a "!=" token, otherwise it would
625 return just the "!" token.
61474454 626
041c3194
ZW
627 To implement a single-pass lexer, this peeking ahead is unworkable.
628 An arbitrary number of escaped newlines, and trigraphs (in particular
629 ??/ which translates to the escape \), could separate the '!' and '='
630 in the input stream, yet the next token is still a "!=".
61474454 631
041c3194
ZW
632 Suppose instead that we lex by one logical line at a time, producing
633 a token list or stack for each logical line, and when seeing the '!'
634 push a CPP_NOT token on the list. Then if the '!' is part of a
635 longer token ("!=") we know we must see the remainder of the token by
636 the time we reach the end of the logical line. Thus we can have the
637 '=' handler look at the previous token (at the end of the list / top
638 of the stack) and see if it is a "!" token, and if so, instead of
639 pushing a "=" token revise the existing token to be a "!=" token.
61474454 640
041c3194
ZW
641 This works in the presence of escaped newlines, because the '\' would
642 have been pushed on the top of the stack as a CPP_BACKSLASH. The
643 newline ('\n' or '\r') handler looks at the token at the top of the
644 stack to see if it is a CPP_BACKSLASH, and if so discards both.
f67798e7 645 Hence the '=' handler would never see any intervening tokens.
45b966db 646
041c3194
ZW
647 To make trigraphs work in this context, as in precedence trigraphs
648 are highest and converted before anything else, the '?' handler does
649 lookahead to see if it is a trigraph, and if so skips the trigraph
650 and pushes the token it represents onto the top of the stack. This
651 also works in the particular case of a CPP_BACKSLASH trigraph.
45b966db 652
041c3194
ZW
653 To the preprocessor, whitespace is only significant to the point of
654 knowing whether whitespace precedes a particular token. For example,
655 the '=' handler needs to know whether there was whitespace between it
656 and a "!" token on the top of the stack, to make the token conversion
657 decision correctly. So each token has a PREV_WHITE flag to
658 indicate this - the standard permits consecutive whitespace to be
659 regarded as a single space. The compiler front ends are not
660 interested in whitespace at all; they just require a token stream.
661 Another place where whitespace is significant to the preprocessor is
662 a #define statment - if there is whitespace between the macro name
663 and an initial "(" token the macro is "object-like", otherwise it is
664 a function-like macro that takes arguments.
665
666 However, all is not rosy. Parsing of identifiers, numbers, comments
667 and strings becomes trickier because of the possibility of raw
668 trigraphs and escaped newlines in the input stream.
669
670 The trigraphs are three consecutive characters beginning with two
671 question marks. A question mark is not valid as part of a number or
672 identifier, so parsing of a number or identifier terminates normally
673 upon reaching it, returning to the mainloop which handles the
674 trigraph just like it would in any other position. Similarly for the
675 backslash of a backslash-newline combination. So we just need the
676 escaped-newline dropper in the mainloop to check if the token on the
677 top of the stack after dropping the escaped newline is a number or
678 identifier, and if so to continue the processing it as if nothing had
679 happened.
680
681 For strings, we replace trigraphs whenever we reach a quote or
682 newline, because there might be a backslash trigraph escaping them.
683 We need to be careful that we start trigraph replacing from where we
684 left off previously, because it is possible for a first scan to leave
685 "fake" trigraphs that a second scan would pick up as real (e.g. the
686 sequence "????/\n=" would find a fake ??= trigraph after removing the
687 escaped newline.)
688
689 For line comments, on reaching a newline we scan the previous
690 character(s) to see if it escaped, and continue if it is. Block
691 comments ignore everything and just focus on finding the comment
692 termination mark. The only difficult thing, and it is surprisingly
693 tricky, is checking if an asterisk precedes the final slash since
694 they could be separated by escaped newlines. If the preprocessor is
695 invoked with the output comments option, we don't bother removing
696 escaped newlines and replacing trigraphs for output.
697
698 Finally, numbers can begin with a period, which is pushed initially
699 as a CPP_DOT token in its own right. The digit handler checks if the
700 previous token was a CPP_DOT not separated by whitespace, and if so
701 pops it off the stack and pushes a period into the number's buffer
702 before calling the number parser.
45b966db 703
041c3194
ZW
704*/
705
706static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
707 U":>", U"<%", U"%>"};
45b966db 708
041c3194
ZW
709/* Call when a trigraph is encountered. It warns if necessary, and
710 returns true if the trigraph should be honoured. END is the third
711 character of a trigraph in the input stream. */
45b966db 712static int
041c3194 713trigraph_ok (pfile, end)
45b966db 714 cpp_reader *pfile;
041c3194 715 const unsigned char *end;
45b966db 716{
041c3194
ZW
717 int accept = CPP_OPTION (pfile, trigraphs);
718
719 if (CPP_OPTION (pfile, warn_trigraphs))
45b966db 720 {
041c3194
ZW
721 unsigned int col = end - 1 - pfile->buffer->line_base;
722 if (accept)
723 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
724 "trigraph ??%c converted to %c",
61d0346d 725 (int) *end, (int) _cpp_trigraph_map[*end]);
45b966db 726 else
041c3194
ZW
727 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
728 "trigraph ??%c ignored", (int) *end);
45b966db 729 }
041c3194 730 return accept;
45b966db
ZW
731}
732
041c3194
ZW
733/* Scan a string for trigraphs, warning or replacing them inline as
734 appropriate. When parsing a string, we must call this routine
735 before processing a newline character (if trigraphs are enabled),
736 since the newline might be escaped by a preceding backslash
737 trigraph sequence. Returns a pointer to the end of the name after
738 replacement. */
739
740static unsigned char *
741trigraph_replace (pfile, src, limit)
45b966db 742 cpp_reader *pfile;
041c3194
ZW
743 unsigned char *src;
744 unsigned char *limit;
45b966db 745{
041c3194
ZW
746 unsigned char *dest;
747
748 /* Starting with src[1], find two consecutive '?'. The case of no
749 trigraphs is streamlined. */
750
043afb2a 751 for (src++; src + 1 < limit; src += 2)
041c3194
ZW
752 {
753 if (src[0] != '?')
754 continue;
755
756 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
757 if (src[-1] == '?')
758 src--;
759 else if (src + 2 == limit || src[1] != '?')
760 continue;
45b966db 761
041c3194 762 /* Check if it really is a trigraph. */
61d0346d 763 if (_cpp_trigraph_map[src[2]] == 0)
041c3194 764 continue;
45b966db 765
041c3194
ZW
766 dest = src;
767 goto trigraph_found;
768 }
769 return limit;
45b966db 770
041c3194
ZW
771 /* Now we have a trigraph, we need to scan the remaining buffer, and
772 copy-shifting its contents left if replacement is enabled. */
773 for (; src + 2 < limit; dest++, src++)
61d0346d 774 if ((*dest = *src) == '?' && src[1] == '?' && _cpp_trigraph_map[src[2]])
041c3194
ZW
775 {
776 trigraph_found:
777 src += 2;
778 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
61d0346d 779 *dest = _cpp_trigraph_map[*src];
041c3194
ZW
780 }
781
782 /* Copy remaining (at most 2) characters. */
783 while (src < limit)
784 *dest++ = *src++;
785 return dest;
45b966db
ZW
786}
787
041c3194
ZW
788/* If CUR is a backslash or the end of a trigraphed backslash, return
789 a pointer to its beginning, otherwise NULL. We don't read beyond
790 the buffer start, because there is the start of the comment in the
791 buffer. */
792static const unsigned char *
793backslash_start (pfile, cur)
64aaf407 794 cpp_reader *pfile;
041c3194 795 const unsigned char *cur;
64aaf407 796{
041c3194
ZW
797 if (cur[0] == '\\')
798 return cur;
799 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
800 && trigraph_ok (pfile, cur))
801 return cur - 2;
802 return 0;
64aaf407
NB
803}
804
041c3194
ZW
805/* Skip a C-style block comment. This is probably the trickiest
806 handler. We find the end of the comment by seeing if an asterisk
807 is before every '/' we encounter. The nasty complication is that a
808 previous asterisk may be separated by one or more escaped newlines.
809 Returns non-zero if comment terminated by EOF, zero otherwise. */
810static int
811skip_block_comment (pfile)
45b966db
ZW
812 cpp_reader *pfile;
813{
041c3194
ZW
814 cpp_buffer *buffer = pfile->buffer;
815 const unsigned char *char_after_star = 0;
52fadca8 816 const unsigned char *cur = buffer->cur;
041c3194 817
041c3194 818 for (; cur < buffer->rlimit; )
45b966db 819 {
041c3194
ZW
820 unsigned char c = *cur++;
821
822 /* People like decorating comments with '*', so check for
823 '/' instead for efficiency. */
824 if (c == '/')
45b966db 825 {
52fadca8
NB
826 /* Don't view / then * then / as finishing the comment. */
827 if ((cur[-2] == '*' && cur - 1 > buffer->cur)
828 || cur - 1 == char_after_star)
829 {
830 buffer->cur = cur;
831 return 0;
832 }
041c3194
ZW
833
834 /* Warn about potential nested comments, but not when
835 the final character inside the comment is a '/'.
836 Don't bother to get it right across escaped newlines. */
837 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
838 && cur[0] == '*' && cur[1] != '/')
45b966db 839 {
041c3194
ZW
840 buffer->cur = cur;
841 cpp_warning (pfile, "'/*' within comment");
45b966db 842 }
45b966db 843 }
91fcd158 844 else if (is_vspace (c))
45b966db 845 {
041c3194
ZW
846 const unsigned char* bslash = backslash_start (pfile, cur - 2);
847
848 handle_newline (cur, buffer->rlimit, c);
849 /* Work correctly if there is an asterisk before an
850 arbirtrarily long sequence of escaped newlines. */
851 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
852 char_after_star = cur;
853 else
854 char_after_star = 0;
45b966db 855 }
52fadca8
NB
856 else if (c == '\t')
857 adjust_column (pfile, cur - 1);
45b966db 858 }
041c3194 859
041c3194 860 buffer->cur = cur;
52fadca8 861 return 1;
45b966db
ZW
862}
863
f9a0e96c
ZW
864/* Skip a C++ line comment. Handles escaped newlines. Returns
865 non-zero if a multiline comment. */
041c3194
ZW
866static int
867skip_line_comment (pfile)
45b966db
ZW
868 cpp_reader *pfile;
869{
041c3194
ZW
870 cpp_buffer *buffer = pfile->buffer;
871 register const unsigned char *cur = buffer->cur;
872 int multiline = 0;
873
874 for (; cur < buffer->rlimit; )
875 {
876 unsigned char c = *cur++;
877
91fcd158 878 if (is_vspace (c))
45b966db 879 {
041c3194
ZW
880 /* Check for a (trigaph?) backslash escaping the newline. */
881 if (!backslash_start (pfile, cur - 2))
882 goto out;
883 multiline = 1;
884 handle_newline (cur, buffer->rlimit, c);
885 }
886 }
887 cur++;
45b966db 888
041c3194
ZW
889 out:
890 buffer->cur = cur - 1; /* Leave newline for caller. */
891 return multiline;
892}
45b966db 893
52fadca8
NB
894/* TAB points to a \t character. Update col_adjust so we track the
895 column correctly. */
896static void
897adjust_column (pfile, tab)
898 cpp_reader *pfile;
899 const U_CHAR *tab;
900{
901 /* Zero-based column. */
902 unsigned int col = CPP_BUF_COLUMN (pfile->buffer, tab);
903
904 /* Round it up to multiple of the tabstop, but subtract 1 since the
905 tab itself occupies a character position. */
906 pfile->col_adjust += (CPP_OPTION (pfile, tabstop)
907 - col % CPP_OPTION (pfile, tabstop)) - 1;
908}
909
041c3194
ZW
910/* Skips whitespace, stopping at next non-whitespace character.
911 Adjusts pfile->col_adjust to account for tabs. This enables tokens
912 to be assigned the correct column. */
913static void
914skip_whitespace (pfile, in_directive)
915 cpp_reader *pfile;
916 int in_directive;
917{
918 cpp_buffer *buffer = pfile->buffer;
91fcd158 919 unsigned short warned = 0;
45b966db 920
91fcd158
NB
921 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
922 while (buffer->cur < buffer->rlimit)
041c3194 923 {
91fcd158 924 unsigned char c = *buffer->cur;
45b966db 925
91fcd158
NB
926 if (!is_nvspace (c))
927 break;
928
929 buffer->cur++;
930 /* Horizontal space always OK. */
931 if (c == ' ')
932 continue;
933 else if (c == '\t')
52fadca8 934 adjust_column (pfile, buffer->cur - 1);
91fcd158
NB
935 /* Must be \f \v or \0. */
936 else if (c == '\0')
041c3194 937 {
91fcd158
NB
938 if (!warned)
939 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
940 CPP_BUF_COL (buffer),
941 "embedded null character ignored");
942 warned = 1;
45b966db 943 }
041c3194 944 else if (in_directive && CPP_PEDANTIC (pfile))
91fcd158
NB
945 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
946 CPP_BUF_COL (buffer),
947 "%s in preprocessing directive",
948 c == '\f' ? "form feed" : "vertical tab");
45b966db 949 }
041c3194 950}
45b966db 951
711b8824
ZW
952/* Parse (append) an identifier. Calculates the hash value of the
953 token while parsing, for performance. The algorithm *must* match
954 cpp_lookup(). */
417f3e3a 955static const U_CHAR *
bfb9dc7f 956parse_name (pfile, tok, cur, rlimit)
45b966db 957 cpp_reader *pfile;
bfb9dc7f
ZW
958 cpp_token *tok;
959 const U_CHAR *cur, *rlimit;
45b966db 960{
711b8824 961 const U_CHAR *name;
bfb9dc7f 962 unsigned int len;
711b8824 963 unsigned int r;
041c3194 964
711b8824
ZW
965 name = cur;
966 r = 0;
bfb9dc7f 967 while (cur < rlimit)
041c3194 968 {
bfb9dc7f
ZW
969 if (! is_idchar (*cur))
970 break;
5ef865d5 971 /* $ is not a identifier character in the standard, but is
e5ec2402
ZW
972 commonly accepted as an extension. Don't warn about it in
973 skipped conditional blocks. */
bfb9dc7f 974 if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
041c3194 975 {
bfb9dc7f 976 CPP_BUFFER (pfile)->cur = cur;
041c3194
ZW
977 cpp_pedwarn (pfile, "'$' character in identifier");
978 }
711b8824
ZW
979
980 r = HASHSTEP (r, cur);
bfb9dc7f 981 cur++;
041c3194 982 }
bfb9dc7f 983 len = cur - name;
45b966db 984
ba89d661 985 if (tok->type == CPP_NAME && tok->val.node == 0)
711b8824
ZW
986 tok->val.node = _cpp_lookup_with_hash (pfile, name, len, r);
987 else
041c3194 988 {
ba89d661
ZW
989 unsigned int oldlen;
990 U_CHAR *newname;
991
992 if (tok->type == CPP_NAME)
993 oldlen = tok->val.node->length;
994 else
995 oldlen = 1;
996
997 newname = alloca (oldlen + len);
998
999 if (tok->type == CPP_NAME)
1000 memcpy (newname, tok->val.node->name, oldlen);
1001 else
1002 newname[0] = tok->val.aux;
bfb9dc7f 1003 memcpy (newname + oldlen, name, len);
711b8824 1004 tok->val.node = cpp_lookup (pfile, newname, len + oldlen);
ba89d661 1005 tok->type = CPP_NAME;
041c3194
ZW
1006 }
1007
bfb9dc7f 1008 return cur;
45b966db
ZW
1009}
1010
041c3194 1011/* Parse (append) a number. */
45b966db 1012static void
041c3194 1013parse_number (pfile, list, name)
45b966db 1014 cpp_reader *pfile;
041c3194 1015 cpp_toklist *list;
bfb9dc7f 1016 cpp_string *name;
45b966db 1017{
041c3194
ZW
1018 const unsigned char *name_limit;
1019 unsigned char *namebuf;
1020 cpp_buffer *buffer = pfile->buffer;
1021 register const unsigned char *cur = buffer->cur;
45b966db 1022
041c3194
ZW
1023 expanded:
1024 name_limit = list->namebuf + list->name_cap;
1025 namebuf = list->namebuf + list->name_used;
45b966db 1026
041c3194
ZW
1027 for (; cur < buffer->rlimit && namebuf < name_limit; )
1028 {
1029 unsigned char c = *namebuf = *cur; /* Copy a single char. */
45b966db 1030
041c3194
ZW
1031 /* Perhaps we should accept '$' here if we accept it for
1032 identifiers. We know namebuf[-1] is safe, because for c to
1033 be a sign we must have pushed at least one character. */
1034 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1035 goto out;
45b966db 1036
041c3194
ZW
1037 namebuf++;
1038 cur++;
45b966db 1039 }
64aaf407 1040
041c3194
ZW
1041 /* Run out of name space? */
1042 if (cur < buffer->rlimit)
1043 {
1044 list->name_used = namebuf - list->namebuf;
1045 auto_expand_name_space (list);
1046 goto expanded;
1047 }
1048
64aaf407 1049 out:
041c3194
ZW
1050 buffer->cur = cur;
1051 name->len = namebuf - name->text;
1052 list->name_used = namebuf - list->namebuf;
45b966db
ZW
1053}
1054
041c3194 1055/* Places a string terminated by an unescaped TERMINATOR into a
bfb9dc7f 1056 cpp_string, which should be expandable and thus at the top of the
041c3194
ZW
1057 list's stack. Handles embedded trigraphs, if necessary, and
1058 escaped newlines.
45b966db 1059
041c3194
ZW
1060 Can be used for character constants (terminator = '\''), string
1061 constants ('"') and angled headers ('>'). Multi-line strings are
1062 allowed, except for within directives. */
45b966db 1063
041c3194
ZW
1064static void
1065parse_string (pfile, list, token, terminator)
45b966db 1066 cpp_reader *pfile;
041c3194
ZW
1067 cpp_toklist *list;
1068 cpp_token *token;
1069 unsigned int terminator;
45b966db 1070{
041c3194 1071 cpp_buffer *buffer = pfile->buffer;
bfb9dc7f 1072 cpp_string *name = &token->val.str;
041c3194
ZW
1073 register const unsigned char *cur = buffer->cur;
1074 const unsigned char *name_limit;
1075 unsigned char *namebuf;
1076 unsigned int null_count = 0;
1077 unsigned int trigraphed = list->name_used;
45b966db 1078
041c3194
ZW
1079 expanded:
1080 name_limit = list->namebuf + list->name_cap;
1081 namebuf = list->namebuf + list->name_used;
f2d5f0cc 1082
041c3194 1083 for (; cur < buffer->rlimit && namebuf < name_limit; )
45b966db 1084 {
041c3194 1085 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
45b966db 1086
041c3194
ZW
1087 if (c == '\0')
1088 null_count++;
91fcd158 1089 else if (c == terminator || is_vspace (c))
45b966db 1090 {
041c3194
ZW
1091 /* Needed for trigraph_replace and multiline string warning. */
1092 buffer->cur = cur;
5eec0563 1093
041c3194
ZW
1094 /* Scan for trigraphs before checking if backslash-escaped. */
1095 if ((CPP_OPTION (pfile, trigraphs)
1096 || CPP_OPTION (pfile, warn_trigraphs))
1097 && namebuf - (list->namebuf + trigraphed) >= 3)
45b966db 1098 {
041c3194
ZW
1099 namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1100 namebuf);
1101 /* The test above guarantees trigraphed will be positive. */
1102 trigraphed = namebuf - list->namebuf - 2;
45b966db 1103 }
45b966db 1104
041c3194 1105 namebuf--; /* Drop the newline / terminator from the name. */
91fcd158 1106 if (is_vspace (c))
45b966db 1107 {
041c3194 1108 /* Drop a backslash newline, and continue. */
a58d32c2
ZW
1109 U_CHAR *old = namebuf;
1110 while (namebuf > list->namebuf && is_hspace (namebuf[-1]))
1111 namebuf--;
a0bff95f 1112 if (namebuf > list->namebuf && namebuf[-1] == '\\')
45b966db 1113 {
041c3194
ZW
1114 handle_newline (cur, buffer->rlimit, c);
1115 namebuf--;
a58d32c2
ZW
1116 if (old[-1] != '\\')
1117 {
1118 buffer->cur = cur;
1119 cpp_warning (pfile,
1120 "backslash and newline separated by space");
1121 }
041c3194 1122 continue;
45b966db 1123 }
a58d32c2
ZW
1124 else
1125 namebuf = old;
45b966db 1126
041c3194 1127 cur--;
45b966db 1128
f9a0e96c
ZW
1129 /* In assembly language, silently terminate strings of
1130 either variety at end of line. This is a kludge
1131 around not knowing where comments are. */
1132 if (CPP_OPTION (pfile, lang_asm))
041c3194 1133 goto out;
64aaf407 1134
f9a0e96c
ZW
1135 /* Character constants and header names may not extend
1136 over multiple lines. In Standard C, neither may
1137 strings. We accept multiline strings as an
041c3194 1138 extension. (Even in directives - otherwise, glibc's
f9a0e96c 1139 longlong.h breaks.) */
041c3194
ZW
1140 if (terminator != '"')
1141 goto unterminated;
1142
1143 cur++; /* Move forwards again. */
45b966db 1144
041c3194
ZW
1145 if (pfile->multiline_string_line == 0)
1146 {
1147 pfile->multiline_string_line = token->line;
1148 pfile->multiline_string_column = token->col;
1149 if (CPP_PEDANTIC (pfile))
1150 cpp_pedwarn (pfile, "multi-line string constant");
1151 }
1152
1153 *namebuf++ = '\n';
1154 handle_newline (cur, buffer->rlimit, c);
45b966db
ZW
1155 }
1156 else
1157 {
041c3194 1158 unsigned char *temp;
45b966db 1159
041c3194
ZW
1160 /* An odd number of consecutive backslashes represents
1161 an escaped terminator. */
1162 temp = namebuf - 1;
1163 while (temp >= name->text && *temp == '\\')
1164 temp--;
45b966db 1165
041c3194
ZW
1166 if ((namebuf - temp) & 1)
1167 goto out;
1168 namebuf++;
1169 }
45b966db 1170 }
ff2b53ef 1171 }
45b966db 1172
041c3194
ZW
1173 /* Run out of name space? */
1174 if (cur < buffer->rlimit)
45b966db 1175 {
041c3194
ZW
1176 list->name_used = namebuf - list->namebuf;
1177 auto_expand_name_space (list);
1178 goto expanded;
1179 }
45b966db 1180
041c3194
ZW
1181 /* We may not have trigraph-replaced the input for this code path,
1182 but as the input is in error by being unterminated we don't
1183 bother. Prevent warnings about no newlines at EOF. */
91fcd158 1184 if (is_vspace (cur[-1]))
041c3194 1185 cur--;
45b966db 1186
041c3194
ZW
1187 unterminated:
1188 cpp_error (pfile, "missing terminating %c character", (int) terminator);
476f2869 1189
041c3194
ZW
1190 if (terminator == '\"' && pfile->multiline_string_line != list->line
1191 && pfile->multiline_string_line != 0)
1192 {
1193 cpp_error_with_line (pfile, pfile->multiline_string_line,
1194 pfile->multiline_string_column,
1195 "possible start of unterminated string literal");
1196 pfile->multiline_string_line = 0;
45b966db 1197 }
041c3194
ZW
1198
1199 out:
1200 buffer->cur = cur;
1201 name->len = namebuf - name->text;
1202 list->name_used = namebuf - list->namebuf;
45b966db 1203
041c3194
ZW
1204 if (null_count > 0)
1205 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1206 : "null character preserved"));
45b966db
ZW
1207}
1208
041c3194 1209/* The character TYPE helps us distinguish comment types: '*' = C
f9a0e96c
ZW
1210 style, '/' = C++ style. For code simplicity, the stored comment
1211 includes the comment start and any terminator. */
041c3194
ZW
1212
1213#define COMMENT_START_LEN 2
9e62c811 1214static void
041c3194
ZW
1215save_comment (list, token, from, len, type)
1216 cpp_toklist *list;
1217 cpp_token *token;
1218 const unsigned char *from;
9e62c811 1219 unsigned int len;
041c3194 1220 unsigned int type;
9e62c811 1221{
041c3194
ZW
1222 unsigned char *buffer;
1223
1224 len += COMMENT_START_LEN;
9e62c811 1225
041c3194
ZW
1226 if (list->name_used + len > list->name_cap)
1227 _cpp_expand_name_space (list, len);
9e62c811 1228
bfb9dc7f 1229 INIT_TOKEN_STR (list, token);
041c3194 1230 token->type = CPP_COMMENT;
bfb9dc7f 1231 token->val.str.len = len;
45b966db 1232
041c3194
ZW
1233 buffer = list->namebuf + list->name_used;
1234 list->name_used += len;
45b966db 1235
041c3194
ZW
1236 /* Copy the comment. */
1237 if (type == '*')
45b966db 1238 {
041c3194
ZW
1239 *buffer++ = '/';
1240 *buffer++ = '*';
45b966db 1241 }
041c3194 1242 else
ea4a453b 1243 {
041c3194
ZW
1244 *buffer++ = type;
1245 *buffer++ = type;
ea4a453b 1246 }
041c3194 1247 memcpy (buffer, from, len - COMMENT_START_LEN);
45b966db
ZW
1248}
1249
041c3194
ZW
1250/*
1251 * The tokenizer's main loop. Returns a token list, representing a
1252 * logical line in the input file. On EOF after some tokens have
1253 * been processed, we return immediately. Then in next call, or if
1254 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1255 * token is placed in the list.
1256 *
1257 * Implementation relies almost entirely on lookback, rather than
1258 * looking forwards. This means that tokenization requires just
1259 * a single pass of the file, even in the presence of trigraphs and
1260 * escaped newlines, providing significant performance benefits.
1261 * Trigraph overhead is negligible if they are disabled, and low
1262 * even when enabled.
1263 */
1264
91fcd158 1265#define KNOWN_DIRECTIVE() (list->directive != 0)
041c3194
ZW
1266#define MIGHT_BE_DIRECTIVE() \
1267(cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
45b966db 1268
041c3194
ZW
1269static void
1270lex_line (pfile, list)
45b966db 1271 cpp_reader *pfile;
041c3194 1272 cpp_toklist *list;
45b966db 1273{
041c3194
ZW
1274 cpp_token *cur_token, *token_limit, *first;
1275 cpp_buffer *buffer = pfile->buffer;
1276 const unsigned char *cur = buffer->cur;
1277 unsigned char flags = 0;
1278 unsigned int first_token = list->tokens_used;
45b966db 1279
041c3194
ZW
1280 if (!(list->flags & LIST_OFFSET))
1281 (abort) ();
9ec7291f
ZW
1282
1283 retry:
041c3194
ZW
1284 list->file = buffer->nominal_fname;
1285 list->line = CPP_BUF_LINE (buffer);
1286 pfile->col_adjust = 0;
1287 pfile->in_lex_line = 1;
1288 if (cur == buffer->buf)
1289 list->flags |= BEG_OF_FILE;
1290
1291 expanded:
1292 token_limit = list->tokens + list->tokens_cap;
1293 cur_token = list->tokens + list->tokens_used;
45b966db 1294
041c3194 1295 for (; cur < buffer->rlimit && cur_token < token_limit;)
45b966db 1296 {
041c3194 1297 unsigned char c;
45b966db 1298
91fcd158
NB
1299 /* Optimize non-vertical whitespace skipping; most tokens are
1300 probably separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
1301 c = *cur;
1302 if (is_nvspace (c))
45b966db 1303 {
91fcd158
NB
1304 buffer->cur = cur;
1305 skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1306 && cur_token > &list->tokens[first_token]));
041c3194 1307 cur = buffer->cur;
ff2b53ef 1308
041c3194
ZW
1309 flags = PREV_WHITE;
1310 if (cur == buffer->rlimit)
1311 break;
91fcd158 1312 c = *cur;
45b966db 1313 }
91fcd158 1314 cur++;
45b966db 1315
041c3194
ZW
1316 /* Initialize current token. CPP_EOF will not be fixed up by
1317 expand_name_space. */
1318 list->tokens_used = cur_token - list->tokens + 1;
1319 cur_token->type = CPP_EOF;
1320 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1321 cur_token->line = CPP_BUF_LINE (buffer);
1322 cur_token->flags = flags;
1323 flags = 0;
46d07497 1324
041c3194
ZW
1325 switch (c)
1326 {
1327 case '0': case '1': case '2': case '3': case '4':
1328 case '5': case '6': case '7': case '8': case '9':
1329 {
1330 int prev_dot;
46d07497 1331
041c3194
ZW
1332 cur--; /* Backup character. */
1333 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1334 if (prev_dot)
1335 cur_token--;
bfb9dc7f 1336 INIT_TOKEN_STR (list, cur_token);
041c3194
ZW
1337 /* Prepend an immediately previous CPP_DOT token. */
1338 if (prev_dot)
1339 {
1340 if (list->name_cap == list->name_used)
1341 auto_expand_name_space (list);
46d07497 1342
bfb9dc7f 1343 cur_token->val.str.len = 1;
041c3194
ZW
1344 list->namebuf[list->name_used++] = '.';
1345 }
46d07497 1346
041c3194
ZW
1347 continue_number:
1348 cur_token->type = CPP_NUMBER; /* Before parse_number. */
1349 buffer->cur = cur;
bfb9dc7f 1350 parse_number (pfile, list, &cur_token->val.str);
041c3194
ZW
1351 cur = buffer->cur;
1352 }
1353 /* Check for # 123 form of #line. */
1354 if (MIGHT_BE_DIRECTIVE ())
1355 list->directive = _cpp_check_linemarker (pfile, cur_token,
1356 !(cur_token[-1].flags
1357 & PREV_WHITE));
1358 cur_token++;
1359 break;
46d07497 1360
041c3194
ZW
1361 letter:
1362 case '_':
1363 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1364 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1365 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1366 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1367 case 'y': case 'z':
1368 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1369 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1370 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1371 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1372 case 'Y': case 'Z':
1373 cur--; /* Backup character. */
ba89d661
ZW
1374
1375 /* In Objective C, '@' may begin certain keywords. */
1376 if (CPP_OPTION (pfile, objc) && cur_token[-1].type == CPP_OTHER
1377 && cur_token[-1].val.aux == '@' && IMMED_TOKEN ())
1378 cur_token--;
1379 else
1380 {
1381 cur_token->val.node = 0;
1382 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
1383 }
041c3194
ZW
1384
1385 continue_name:
bfb9dc7f 1386 cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
45b966db 1387
041c3194
ZW
1388 if (MIGHT_BE_DIRECTIVE ())
1389 list->directive = _cpp_check_directive (pfile, cur_token,
1390 !(list->tokens[0].flags
1391 & PREV_WHITE));
92936ecf
ZW
1392 /* Convert named operators to their proper types. */
1393 if (cur_token->val.node->type == T_OPERATOR)
1394 {
1395 cur_token->flags |= NAMED_OP;
1396 cur_token->type = cur_token->val.node->value.code;
1397 }
1398
041c3194
ZW
1399 cur_token++;
1400 break;
f8f769ea 1401
041c3194 1402 case '\'':
ba89d661
ZW
1403 cur_token->type = CPP_CHAR;
1404 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1405 && cur_token[-1].val.node == pfile->spec_nodes->n_L)
1406 BACKUP_TOKEN (CPP_WCHAR);
1407 goto do_parse_string;
1408
041c3194 1409 case '\"':
ba89d661 1410 cur_token->type = CPP_STRING;
041c3194 1411 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
f9a0e96c 1412 && cur_token[-1].val.node == pfile->spec_nodes->n_L)
ba89d661
ZW
1413 BACKUP_TOKEN (CPP_WSTRING);
1414 else if (CPP_OPTION (pfile, objc)
1415 && cur_token[-1].type == CPP_OTHER && IMMED_TOKEN ()
1416 && cur_token[-1].val.aux == '@')
1417 BACKUP_TOKEN (CPP_OSTRING);
45b966db 1418
041c3194
ZW
1419 do_parse_string:
1420 /* Here c is one of ' " or >. */
bfb9dc7f 1421 INIT_TOKEN_STR (list, cur_token);
041c3194
ZW
1422 buffer->cur = cur;
1423 parse_string (pfile, list, cur_token, c);
1424 cur = buffer->cur;
1425 cur_token++;
1426 break;
45b966db 1427
041c3194
ZW
1428 case '/':
1429 cur_token->type = CPP_DIV;
1430 if (IMMED_TOKEN ())
1431 {
1432 if (PREV_TOKEN_TYPE == CPP_DIV)
1433 {
1434 /* We silently allow C++ comments in system headers,
1435 irrespective of conformance mode, because lots of
1436 broken systems do that and trying to clean it up
1437 in fixincludes is a nightmare. */
1438 if (CPP_IN_SYSTEM_HEADER (pfile))
1439 goto do_line_comment;
1440 else if (CPP_OPTION (pfile, cplusplus_comments))
1441 {
1442 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1443 && ! buffer->warned_cplusplus_comments)
1444 {
1445 buffer->cur = cur;
1446 cpp_pedwarn (pfile,
1447 "C++ style comments are not allowed in ISO C89");
1448 cpp_pedwarn (pfile,
1449 "(this will be reported only once per input file)");
1450 buffer->warned_cplusplus_comments = 1;
1451 }
1452 do_line_comment:
1453 buffer->cur = cur;
1454#if 0 /* Leave until new lexer in place. */
1455 if (cur[-2] != c)
1456 cpp_warning (pfile,
1457 "comment start split across lines");
1458#endif
1459 if (skip_line_comment (pfile))
1460 cpp_warning (pfile, "multi-line comment");
f8f769ea 1461
041c3194
ZW
1462 /* Back-up to first '-' or '/'. */
1463 cur_token--;
1464 if (!CPP_OPTION (pfile, discard_comments)
91fcd158 1465 && (!KNOWN_DIRECTIVE()
041c3194
ZW
1466 || (list->directive->flags & COMMENTS)))
1467 save_comment (list, cur_token++, cur,
1468 buffer->cur - cur, c);
f9a0e96c 1469 else
041c3194
ZW
1470 flags = PREV_WHITE;
1471
1472 cur = buffer->cur;
1473 break;
1474 }
1475 }
1476 }
1477 cur_token++;
1478 break;
1479
1480 case '*':
1481 cur_token->type = CPP_MULT;
1482 if (IMMED_TOKEN ())
45b966db 1483 {
041c3194
ZW
1484 if (PREV_TOKEN_TYPE == CPP_DIV)
1485 {
1486 buffer->cur = cur;
1487#if 0 /* Leave until new lexer in place. */
1488 if (cur[-2] != '/')
1489 cpp_warning (pfile,
1490 "comment start '/*' split across lines");
1491#endif
1492 if (skip_block_comment (pfile))
1493 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1494 "unterminated comment");
1495#if 0 /* Leave until new lexer in place. */
1496 else if (buffer->cur[-2] != '*')
1497 cpp_warning (pfile,
1498 "comment end '*/' split across lines");
1499#endif
1500 /* Back up to opening '/'. */
1501 cur_token--;
1502 if (!CPP_OPTION (pfile, discard_comments)
91fcd158 1503 && (!KNOWN_DIRECTIVE()
041c3194
ZW
1504 || (list->directive->flags & COMMENTS)))
1505 save_comment (list, cur_token++, cur,
1506 buffer->cur - cur, c);
f9a0e96c 1507 else
041c3194
ZW
1508 flags = PREV_WHITE;
1509
1510 cur = buffer->cur;
1511 break;
1512 }
1513 else if (CPP_OPTION (pfile, cplusplus))
1514 {
1515 /* In C++, there are .* and ->* operators. */
1516 if (PREV_TOKEN_TYPE == CPP_DEREF)
1517 BACKUP_TOKEN (CPP_DEREF_STAR);
1518 else if (PREV_TOKEN_TYPE == CPP_DOT)
1519 BACKUP_TOKEN (CPP_DOT_STAR);
1520 }
45b966db 1521 }
041c3194 1522 cur_token++;
f8f769ea 1523 break;
45b966db 1524
041c3194
ZW
1525 case '\n':
1526 case '\r':
1527 handle_newline (cur, buffer->rlimit, c);
1528 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
45b966db 1529 {
a58d32c2
ZW
1530 /* backslash space newline is still treated as backslash-newline;
1531 we think this is standard conforming, with some reservations
1532 about actually _using_ the weasel words in C99 5.1.1.2
1533 (translation phase 1 is allowed to do whatever it wants to
1534 your input as long as it's documented). */
1535 if (! IMMED_TOKEN ())
041c3194
ZW
1536 {
1537 buffer->cur = cur;
1538 cpp_warning (pfile,
1539 "backslash and newline separated by space");
1540 }
a58d32c2
ZW
1541
1542 /* Remove the escaped newline. Then continue to process
1543 any interrupted name or number. */
1544 cur_token--;
1545 /* Backslash-newline may not be immediately followed by
1546 EOF (C99 5.1.1.2). */
1547 if (cur >= buffer->rlimit)
1548 {
1549 cpp_pedwarn (pfile, "backslash-newline at end of file");
1550 break;
1551 }
1552 if (IMMED_TOKEN ())
1553 {
1554 cur_token--;
1555 if (cur_token->type == CPP_NAME)
1556 goto continue_name;
1557 else if (cur_token->type == CPP_NUMBER)
1558 goto continue_number;
1559 cur_token++;
1560 }
1561 /* Remember whitespace setting. */
1562 flags = cur_token->flags;
1563 break;
041c3194
ZW
1564 }
1565 else if (MIGHT_BE_DIRECTIVE ())
1566 {
1567 /* "Null directive." C99 6.10.7: A preprocessing
1568 directive of the form # <new-line> has no effect.
1569
1570 But it is still a directive, and therefore disappears
1571 from the output. */
1572 cur_token--;
f9a0e96c
ZW
1573 if (cur_token->flags & PREV_WHITE
1574 && CPP_WTRADITIONAL (pfile))
1575 cpp_warning (pfile, "K+R C ignores #\\n with the # indented");
f8f769ea 1576 }
45b966db 1577
041c3194
ZW
1578 /* Skip vertical space until we have at least one token to
1579 return. */
1580 if (cur_token != &list->tokens[first_token])
1581 goto out;
1582 list->line = CPP_BUF_LINE (buffer);
f8f769ea 1583 break;
04e3ec78 1584
041c3194
ZW
1585 case '-':
1586 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
f9a0e96c 1587 REVISE_TOKEN (CPP_MINUS_MINUS);
041c3194
ZW
1588 else
1589 PUSH_TOKEN (CPP_MINUS);
1590 break;
45b966db 1591
041c3194
ZW
1592 make_hash:
1593 case '#':
1594 /* The digraph flag checking ensures that ## and %:%:
1595 are interpreted as CPP_PASTE, but #%: and %:# are not. */
1596 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1597 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1598 REVISE_TOKEN (CPP_PASTE);
1599 else
1600 PUSH_TOKEN (CPP_HASH);
1601 break;
04e3ec78 1602
041c3194
ZW
1603 case ':':
1604 cur_token->type = CPP_COLON;
1605 if (IMMED_TOKEN ())
1606 {
1607 if (PREV_TOKEN_TYPE == CPP_COLON
1608 && CPP_OPTION (pfile, cplusplus))
1609 BACKUP_TOKEN (CPP_SCOPE);
9b55f29a 1610 else if (CPP_OPTION (pfile, digraphs))
041c3194 1611 {
9b55f29a
NB
1612 /* Digraph: "<:" is a '[' */
1613 if (PREV_TOKEN_TYPE == CPP_LESS)
1614 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1615 /* Digraph: "%:" is a '#' */
1616 else if (PREV_TOKEN_TYPE == CPP_MOD)
1617 {
1618 (--cur_token)->flags |= DIGRAPH;
1619 goto make_hash;
1620 }
041c3194
ZW
1621 }
1622 }
1623 cur_token++;
1624 break;
f8f769ea 1625
041c3194
ZW
1626 case '&':
1627 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1628 REVISE_TOKEN (CPP_AND_AND);
1629 else
1630 PUSH_TOKEN (CPP_AND);
f8f769ea 1631 break;
45b966db 1632
041c3194
ZW
1633 make_or:
1634 case '|':
1635 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1636 REVISE_TOKEN (CPP_OR_OR);
1637 else
1638 PUSH_TOKEN (CPP_OR);
1639 break;
45b966db 1640
041c3194
ZW
1641 case '+':
1642 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1643 REVISE_TOKEN (CPP_PLUS_PLUS);
1644 else
1645 PUSH_TOKEN (CPP_PLUS);
1646 break;
45b966db 1647
041c3194
ZW
1648 case '=':
1649 /* This relies on equidistance of "?=" and "?" tokens. */
1650 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1651 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1652 else
1653 PUSH_TOKEN (CPP_EQ);
1654 break;
45b966db 1655
041c3194
ZW
1656 case '>':
1657 cur_token->type = CPP_GREATER;
1658 if (IMMED_TOKEN ())
1659 {
1660 if (PREV_TOKEN_TYPE == CPP_GREATER)
1661 BACKUP_TOKEN (CPP_RSHIFT);
1662 else if (PREV_TOKEN_TYPE == CPP_MINUS)
1663 BACKUP_TOKEN (CPP_DEREF);
9b55f29a
NB
1664 else if (CPP_OPTION (pfile, digraphs))
1665 {
1666 /* Digraph: ":>" is a ']' */
1667 if (PREV_TOKEN_TYPE == CPP_COLON)
1668 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1669 /* Digraph: "%>" is a '}' */
1670 else if (PREV_TOKEN_TYPE == CPP_MOD)
1671 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1672 }
041c3194
ZW
1673 }
1674 cur_token++;
1675 break;
1676
1677 case '<':
1678 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1679 {
1680 REVISE_TOKEN (CPP_LSHIFT);
1681 break;
1682 }
1683 /* Is this the beginning of a header name? */
91fcd158 1684 if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
041c3194
ZW
1685 {
1686 c = '>'; /* Terminator. */
1687 cur_token->type = CPP_HEADER_NAME;
1688 goto do_parse_string;
1689 }
1690 PUSH_TOKEN (CPP_LESS);
1691 break;
45b966db 1692
041c3194
ZW
1693 case '%':
1694 /* Digraph: "<%" is a '{' */
1695 cur_token->type = CPP_MOD;
9b55f29a
NB
1696 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1697 && CPP_OPTION (pfile, digraphs))
041c3194
ZW
1698 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1699 cur_token++;
1700 break;
04e3ec78 1701
041c3194
ZW
1702 case '?':
1703 if (cur + 1 < buffer->rlimit && *cur == '?'
61d0346d 1704 && _cpp_trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
041c3194
ZW
1705 {
1706 /* Handle trigraph. */
1707 cur++;
1708 switch (*cur++)
1709 {
1710 case '(': goto make_open_square;
1711 case ')': goto make_close_square;
1712 case '<': goto make_open_brace;
1713 case '>': goto make_close_brace;
1714 case '=': goto make_hash;
1715 case '!': goto make_or;
1716 case '-': goto make_complement;
1717 case '/': goto make_backslash;
1718 case '\'': goto make_xor;
1719 }
1720 }
1721 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1722 {
1723 /* GNU C++ defines <? and >? operators. */
1724 if (PREV_TOKEN_TYPE == CPP_LESS)
1725 {
1726 REVISE_TOKEN (CPP_MIN);
1727 break;
1728 }
1729 else if (PREV_TOKEN_TYPE == CPP_GREATER)
1730 {
1731 REVISE_TOKEN (CPP_MAX);
1732 break;
1733 }
1734 }
1735 PUSH_TOKEN (CPP_QUERY);
1736 break;
45b966db 1737
041c3194
ZW
1738 case '.':
1739 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1740 && IMMED_TOKEN ()
1741 && !(cur_token[-1].flags & PREV_WHITE))
1742 {
1743 cur_token -= 2;
1744 PUSH_TOKEN (CPP_ELLIPSIS);
1745 }
1746 else
1747 PUSH_TOKEN (CPP_DOT);
1748 break;
c5a04734 1749
041c3194
ZW
1750 make_complement:
1751 case '~': PUSH_TOKEN (CPP_COMPL); break;
1752 make_xor:
1753 case '^': PUSH_TOKEN (CPP_XOR); break;
1754 make_open_brace:
1755 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1756 make_close_brace:
1757 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1758 make_open_square:
1759 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1760 make_close_square:
1761 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1762 make_backslash:
1763 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1764 case '!': PUSH_TOKEN (CPP_NOT); break;
1765 case ',': PUSH_TOKEN (CPP_COMMA); break;
1766 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1767 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1768 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
6d2c2047 1769
041c3194
ZW
1770 case '$':
1771 if (CPP_OPTION (pfile, dollars_in_ident))
1772 goto letter;
1773 /* Fall through */
041c3194
ZW
1774 default:
1775 cur_token->val.aux = c;
1776 PUSH_TOKEN (CPP_OTHER);
1777 break;
1778 }
1779 }
6d2c2047 1780
041c3194
ZW
1781 /* Run out of token space? */
1782 if (cur_token == token_limit)
1783 {
1784 list->tokens_used = cur_token - list->tokens;
1785 _cpp_expand_token_space (list, 256);
1786 goto expanded;
1787 }
6d2c2047 1788
041c3194
ZW
1789 cur_token->flags = flags;
1790 if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1791 {
91fcd158 1792 if (cur > buffer->buf && !is_vspace (cur[-1]))
041c3194
ZW
1793 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1794 CPP_BUF_COLUMN (buffer, cur),
1795 "no newline at end of file");
1796 cur_token++->type = CPP_EOF;
1797 }
6d2c2047 1798
041c3194
ZW
1799 out:
1800 /* All tokens are allocated, so the memory location is fixed. */
1801 first = &list->tokens[first_token];
1802
1803 /* Don't complain about the null directive, nor directives in
1804 assembly source: we don't know where the comments are, and # may
1805 introduce assembler pseudo-ops. Don't complain about invalid
1806 directives in skipped conditional groups (6.10 p4). */
1807 if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1808 && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1809 {
1810 if (first[1].type == CPP_NAME)
92936ecf
ZW
1811 cpp_error (pfile, "invalid preprocessing directive #%s",
1812 first[1].val.node->name);
041c3194
ZW
1813 else
1814 cpp_error (pfile, "invalid preprocessing directive");
9ec7291f
ZW
1815
1816 /* Discard this line to prevent further errors from cc1. */
1817 _cpp_clear_toklist (list);
1818 goto retry;
041c3194
ZW
1819 }
1820
91fcd158
NB
1821 /* Put EOF at end of known directives. This covers "directives do
1822 not extend beyond the end of the line (description 6.10 part 2)". */
1823 if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
041c3194
ZW
1824 {
1825 pfile->first_directive_token = first;
1826 cur_token++->type = CPP_EOF;
1827 }
1828
6f4280ef
ZW
1829 first->flags |= BOL;
1830 if (first_token != 0)
041c3194
ZW
1831 /* 6.10.3.10: Within the sequence of preprocessing tokens making
1832 up the invocation of a function-like macro, new line is
1833 considered a normal white-space character. */
1834 first->flags |= PREV_WHITE;
1835
1836 buffer->cur = cur;
1837 list->tokens_used = cur_token - list->tokens;
1838 pfile->in_lex_line = 0;
6d2c2047
ZW
1839}
1840
041c3194 1841/* Write the spelling of a token TOKEN, with any appropriate
58fea6af
ZW
1842 whitespace before it, to FP. PREV is the previous token, which
1843 is used to determine if we need to shove in an extra space in order
1844 to avoid accidental token paste. If WHITE is 0, do not insert any
1845 leading whitespace. */
041c3194 1846static void
58fea6af 1847output_token (pfile, fp, token, prev, white)
041c3194 1848 cpp_reader *pfile;
58fea6af 1849 FILE *fp;
041c3194 1850 const cpp_token *token, *prev;
58fea6af 1851 int white;
041c3194 1852{
58fea6af 1853 if (white)
041c3194 1854 {
58fea6af
ZW
1855 int dummy;
1856
1857 if (token->col && (token->flags & BOL))
1858 {
1859 /* Supply enough whitespace to put this token in its original
1860 column. Don't bother trying to reconstruct tabs; we can't
1861 get it right in general, and nothing ought to care. (Yes,
1862 some things do care; the fault lies with them.) */
1863 unsigned int spaces = token->col - 1;
1864
1865 while (spaces--)
1866 putc (' ', fp);
1867 }
1868 else if (token->flags & PREV_WHITE)
1869 putc (' ', fp);
1870 else
1871 /* Check for and prevent accidental token pasting.
1872 In addition to the cases handled by can_paste, consider
1873
1874 a + ++b - if there is not a space between the + and ++, it
1875 will be misparsed as a++ + b. But + ## ++ doesn't produce
1876 a valid token. */
1877 if (prev
1878 && (can_paste (pfile, prev, token, &dummy) != CPP_EOF
1879 || (prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1880 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS)))
1881 putc (' ', fp);
041c3194 1882 }
58fea6af
ZW
1883
1884 switch (TOKEN_SPELL (token))
041c3194 1885 {
58fea6af
ZW
1886 case SPELL_OPERATOR:
1887 {
1888 const unsigned char *spelling;
1889
1890 if (token->flags & DIGRAPH)
1891 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1892 else if (token->flags & NAMED_OP)
1893 goto spell_ident;
1894 else
1895 spelling = TOKEN_NAME (token);
1896
1897 ufputs (spelling, fp);
1898 }
1899 break;
1900
1901 case SPELL_IDENT:
1902 spell_ident:
1903 ufputs (token->val.node->name, fp);
1904 break;
1905
1906 case SPELL_STRING:
1907 {
ba89d661
ZW
1908 int left, right, tag;
1909 switch (token->type)
1910 {
1911 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1912 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1913 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1914 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1915 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1916 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1917 default: left = '\0'; right = '\0'; tag = '\0'; break;
1918 }
1919 if (tag) putc (tag, fp);
1920 if (left) putc (left, fp);
58fea6af 1921 fwrite (token->val.str.text, 1, token->val.str.len, fp);
ba89d661 1922 if (right) putc (right, fp);
58fea6af
ZW
1923 }
1924 break;
1925
1926 case SPELL_CHAR:
1927 putc (token->val.aux, fp);
1928 break;
1929
1930 case SPELL_NONE:
1931 /* Placemarker or EOF - no output. (Macro args are handled
1932 elsewhere. */
1933 break;
041c3194 1934 }
58fea6af
ZW
1935}
1936
1937/* Dump the original user's spelling of argument index ARG_NO to the
1938 macro whose expansion is LIST. */
1939static void
1940dump_param_spelling (fp, list, arg_no)
1941 FILE *fp;
1942 const cpp_toklist *list;
1943 unsigned int arg_no;
1944{
1945 const U_CHAR *param = list->namebuf;
1946
1947 while (arg_no--)
1948 param += ustrlen (param) + 1;
1949 ufputs (param, fp);
1950}
1951
1952/* Output all the tokens of LIST, starting at TOKEN, to FP. */
1953void
1954cpp_output_list (pfile, fp, list, token)
1955 cpp_reader *pfile;
1956 FILE *fp;
1957 const cpp_toklist *list;
1958 const cpp_token *token;
1959{
1960 const cpp_token *limit = list->tokens + list->tokens_used;
1961 const cpp_token *prev = 0;
1962 int white = 0;
d6d5f795 1963
58fea6af
ZW
1964 while (token < limit)
1965 {
1966 /* XXX Find some way we can write macro args from inside
1967 output_token/spell_token. */
1968 if (token->type == CPP_MACRO_ARG)
1969 {
1970 if (white && token->flags & PREV_WHITE)
1971 putc (' ', fp);
1972 if (token->flags & STRINGIFY_ARG)
1973 putc ('#', fp);
1974 dump_param_spelling (fp, list, token->val.aux);
1975 }
1976 else
1977 output_token (pfile, fp, token, prev, white);
1978 if (token->flags & PASTE_LEFT)
1979 fputs (" ##", fp);
1980 prev = token;
1981 token++;
1982 white = 1;
1983 }
041c3194 1984}
d6d5f795 1985
58fea6af 1986
041c3194 1987/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1988 already contain the enough space to hold the token's spelling.
1989 Returns a pointer to the character after the last character
1990 written. */
d6d5f795 1991
041c3194
ZW
1992static unsigned char *
1993spell_token (pfile, token, buffer)
1994 cpp_reader *pfile; /* Would be nice to be rid of this... */
1995 const cpp_token *token;
1996 unsigned char *buffer;
1997{
96be6998 1998 switch (TOKEN_SPELL (token))
041c3194
ZW
1999 {
2000 case SPELL_OPERATOR:
2001 {
2002 const unsigned char *spelling;
2003 unsigned char c;
d6d5f795 2004
041c3194
ZW
2005 if (token->flags & DIGRAPH)
2006 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
92936ecf
ZW
2007 else if (token->flags & NAMED_OP)
2008 goto spell_ident;
041c3194 2009 else
96be6998 2010 spelling = TOKEN_NAME (token);
041c3194
ZW
2011
2012 while ((c = *spelling++) != '\0')
2013 *buffer++ = c;
2014 }
2015 break;
d6d5f795 2016
041c3194 2017 case SPELL_IDENT:
92936ecf 2018 spell_ident:
bfb9dc7f
ZW
2019 memcpy (buffer, token->val.node->name, token->val.node->length);
2020 buffer += token->val.node->length;
041c3194 2021 break;
d6d5f795 2022
041c3194
ZW
2023 case SPELL_STRING:
2024 {
ba89d661
ZW
2025 int left, right, tag;
2026 switch (token->type)
2027 {
2028 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
2029 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
2030 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
2031 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
2032 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
2033 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
2034 default: left = '\0'; right = '\0'; tag = '\0'; break;
2035 }
2036 if (tag) *buffer++ = tag;
2037 if (left) *buffer++ = left;
bfb9dc7f
ZW
2038 memcpy (buffer, token->val.str.text, token->val.str.len);
2039 buffer += token->val.str.len;
ba89d661 2040 if (right) *buffer++ = right;
041c3194
ZW
2041 }
2042 break;
d6d5f795 2043
041c3194
ZW
2044 case SPELL_CHAR:
2045 *buffer++ = token->val.aux;
2046 break;
d6d5f795 2047
041c3194 2048 case SPELL_NONE:
96be6998 2049 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
2050 break;
2051 }
d6d5f795 2052
041c3194
ZW
2053 return buffer;
2054}
d6d5f795 2055
f67798e7
NB
2056/* Macro expansion algorithm.
2057
2058Macro expansion is implemented by a single-pass algorithm; there are
2059no rescan passes involved. cpp_get_token expands just enough to be
2060able to return a token to the caller, a consequence is that when it
2061returns the preprocessor can be in a state of mid-expansion. The
2062algorithm does not work by fully expanding a macro invocation into
2063some kind of token list, and then returning them one by one.
2064
2065Our expansion state is recorded in a context stack. We start out with
2066a single context on the stack, let's call it base context. This
2067consists of the token list returned by lex_line that forms the next
2068logical line in the source file.
2069
2070The current level in the context stack is stored in the cur_context
2071member of the cpp_reader structure. The context it references keeps,
2072amongst other things, a count of how many tokens form that context and
2073our position within those tokens.
2074
2075Fundamentally, calling cpp_get_token will return the next token from
2076the current context. If we're at the end of the current context, that
2077context is popped from the stack first, unless it is the base context,
2078in which case the next logical line is lexed from the source file.
2079
2080However, before returning the token, if it is a CPP_NAME token
2081_cpp_get_token checks to see if it is a macro and if it is enabled.
2082Each time it encounters a macro name, it calls push_macro_context.
2083This function checks that the macro should be expanded (with
2084is_macro_enabled), and if so pushes a new macro context on the stack
2085which becomes the current context. It then loops back to read the
2086first token of the macro context.
2087
2088A macro context basically consists of the token list representing the
2089macro's replacement list, which was saved in the hash table by
2090save_macro_expansion when its #define statement was parsed. If the
2091macro is function-like, it also contains the tokens that form the
2092arguments to the macro. I say more about macro arguments below, but
2093for now just saying that each argument is a set of pointers to tokens
2094is enough.
2095
2096When taking tokens from a macro context, we may get a CPP_MACRO_ARG
2097token. This represents an argument passed to the macro, with the
2098argument number stored in the token's AUX field. The argument should
2099be substituted, this is achieved by pushing an "argument context". An
2100argument context is just refers to the tokens forming the argument,
2101which are obtained directly from the macro context. The STRINGIFY
2102flag on a CPP_MACRO_ARG token indicates that the argument should be
2103stringified.
2104
2105Here's a few simple rules the context stack obeys:-
2106
2107 1) The lex_line token list is always context zero.
2108
2109 2) Context 1, if it exists, must be a macro context.
2110
2111 3) An argument context can only appear above a macro context.
2112
2113 4) A macro context can appear above the base context, another macro
2114 context, or an argument context.
2115
2116 5) These imply that the minimal level of an argument context is 2.
2117
2118The only tricky thing left is ensuring that macros are enabled and
2119disabled correctly. The algorithm controls macro expansion by the
2120level of the context a token is taken from in the context stack. If a
2121token is taken from a level equal to no_expand_level (a member of
2122struct cpp_reader), no expansion is performed.
2123
2124When popping a context off the stack, if no_expand_level equals the
2125level of the popped context, it is reduced by one to match the new
2126context level, so that expansion is still disabled. It does not
2127increase if a context is pushed, though. It starts out life as
2128UINT_MAX, which has the effect that initially macro expansion is
2129enabled. I explain how this mechanism works below.
2130
2131The standard requires:-
2132
2133 1) Arguments to be fully expanded before substitution.
2134
2135 2) Stringified arguments to not be expanded, nor the tokens
2136 immediately surrounding a ## operator.
2137
2138 3) Continual rescanning until there are no more macros left to
2139 replace.
2140
2141 4) Once a macro has been expanded in stage 1) or 3), it cannot be
2142 expanded again during later rescans. This prevents infinite
2143 recursion.
2144
2145The first thing to observe is that stage 3) is mostly redundant.
2146Since a macro is disabled once it has been expanded, how can a rescan
2147find an unexpanded macro name? There are only two cases where this is
2148possible:-
2149
2150 a) If the macro name results from a token paste operation.
2151
2152 b) If the macro in question is a function-like macro that hasn't
2153 already been expanded because previously there was not the required
2154 '(' token immediately following it. This is only possible when an
2155 argument is substituted, and after substitution the last token of
2156 the argument can bind with a parenthesis appearing in the tokens
2157 following the substitution. Note that if the '(' appears within the
2158 argument, the ')' must too, as expanding macro arguments cannot
2159 "suck in" tokens outside the argument.
2160
2161So we tackle this as follows. When parsing the macro invocation for
2162arguments, we record the tokens forming each argument as a list of
2163pointers to those tokens. We do not expand any tokens that are "raw",
2164i.e. directly from the macro invocation, but other tokens that come
2165from (nested) argument substitution are fully expanded.
2166
2167This is achieved by setting the no_expand_level to that of the macro
2168invocation. A CPP_MACRO_ARG token never appears in the list of tokens
2169forming an argument, because parse_args (indirectly) calls
2170get_raw_token which automatically pushes argument contexts and traces
2171into them. Since these contexts are at a higher level than the
2172no_expand_level, they get fully macro expanded.
2173
2174"Raw" and non-raw tokens are separated in arguments by null pointers,
2175with the policy that the initial state of an argument is raw. If the
2176first token is not raw, it should be preceded by a null pointer. When
2177tracing through the tokens of an argument context, each time
2178get_raw_token encounters a null pointer, it toggles the flag
2179CONTEXT_RAW.
2180
2181This flag, when set, indicates to is_macro_disabled that we are
2182reading raw tokens which should be macro-expanded. Similarly, if
2183clear, is_macro_disabled suppresses re-expansion.
2184
2185It's probably time for an example.
2186
2187#define hash #
2188#define str(x) #x
2189#define xstr(y) str(y hash)
2190str(hash) // "hash"
2191xstr(hash) // "# hash"
2192
2193In the invocation of str, parse_args turns off macro expansion and so
2194parses the argument as <hash>. This is the only token (pointer)
2195passed as the argument to str. Since <hash> is raw there is no need
2196for an initial null pointer. stringify_arg is called from
2197get_raw_token when tracing through the expansion of str, since the
2198argument has the STRINGIFY flag set. stringify_arg turns off
2199macro_expansion by setting the no_expand_level to that of the argument
2200context. Thus it gets the token <hash> and stringifies it to "hash"
2201correctly.
2202
2203Similary xstr is passed <hash>. However, when parse_args is parsing
2204the invocation of str() in xstr's expansion, get_raw_token encounters
2205a CPP_MACRO_ARG token for y. Transparently to parse_args, it pushes
2206an argument context, and enters the tokens of the argument,
2207i.e. <hash>. This is at a higher context level than parse_args
2208disabled, and so is_macro_disabled permits expansion of it and a macro
2209context is pushed on top of the argument context. This contains the
2210<#> token, and the end result is that <hash> is macro expanded.
2211However, after popping off the argument context, the <hash> of xstr's
2212expansion does not get macro expanded because we're back at the
2213no_expand_level. The end result is that the argument passed to str is
2214<NULL> <#> <NULL> <hash>. Note the nulls - policy is we start off
2215raw, <#> is not raw, but then <hash> is.
2216
2217*/
d6d5f795 2218
041c3194
ZW
2219
2220/* Free the storage allocated for macro arguments. */
2221static void
2222free_macro_args (args)
2223 macro_args *args;
c5a04734 2224{
041c3194 2225 if (args->tokens)
417f3e3a 2226 free ((PTR) args->tokens);
041c3194
ZW
2227 free (args->ends);
2228 free (args);
c5a04734
ZW
2229}
2230
041c3194
ZW
2231/* Determines if a macro has been already used (and is therefore
2232 disabled). */
c5a04734 2233static int
041c3194 2234is_macro_disabled (pfile, expansion, token)
c5a04734 2235 cpp_reader *pfile;
041c3194
ZW
2236 const cpp_toklist *expansion;
2237 const cpp_token *token;
c5a04734 2238{
041c3194
ZW
2239 cpp_context *context = CURRENT_CONTEXT (pfile);
2240
2241 /* Arguments on either side of ## are inserted in place without
2242 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
2243 occurs during a later rescan pass. The effect is that we expand
2244 iff we would as part of the macro's expansion list, so we should
2245 drop to the macro's context. */
2246 if (IS_ARG_CONTEXT (context))
c5a04734 2247 {
041c3194
ZW
2248 if (token->flags & PASTED)
2249 context--;
2250 else if (!(context->flags & CONTEXT_RAW))
2251 return 1;
2252 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2253 context--;
c5a04734 2254 }
c5a04734 2255
041c3194
ZW
2256 /* Have we already used this macro? */
2257 while (context->level > 0)
c5a04734 2258 {
041c3194
ZW
2259 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2260 return 1;
2261 /* Raw argument tokens are judged based on the token list they
2262 came from. */
2263 if (context->flags & CONTEXT_RAW)
2264 context = pfile->contexts + context->level;
2265 else
2266 context--;
2267 }
c5a04734 2268
041c3194
ZW
2269 /* Function-like macros may be disabled if the '(' is not in the
2270 current context. We check this without disrupting the context
2271 stack. */
2272 if (expansion->paramc >= 0)
2273 {
2274 const cpp_token *next;
2275 unsigned int prev_nme;
c5a04734 2276
041c3194
ZW
2277 context = CURRENT_CONTEXT (pfile);
2278 /* Drop down any contexts we're at the end of: the '(' may
2279 appear in lower macro expansions, or in the rest of the file. */
2280 while (context->posn == context->count && context > pfile->contexts)
2281 {
2282 context--;
2283 /* If we matched, we are disabled, as we appear in the
2284 expansion of each macro we meet. */
2285 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2286 return 1;
2287 }
c5a04734 2288
041c3194
ZW
2289 prev_nme = pfile->no_expand_level;
2290 pfile->no_expand_level = context - pfile->contexts;
417f3e3a 2291 next = _cpp_get_token (pfile);
041c3194
ZW
2292 restore_macro_expansion (pfile, prev_nme);
2293 if (next->type != CPP_OPEN_PAREN)
2294 {
2295 _cpp_push_token (pfile, next);
b9bf5af8 2296 if (CPP_WTRADITIONAL (pfile))
041c3194 2297 cpp_warning (pfile,
92936ecf
ZW
2298 "function macro %s must be used with arguments in traditional C",
2299 token->val.node->name);
041c3194
ZW
2300 return 1;
2301 }
c5a04734 2302 }
c5a04734 2303
041c3194 2304 return 0;
c5a04734
ZW
2305}
2306
041c3194
ZW
2307/* Add a token to the set of tokens forming the arguments to the macro
2308 being parsed in parse_args. */
2309static void
2310save_token (args, token)
2311 macro_args *args;
2312 const cpp_token *token;
c5a04734 2313{
041c3194
ZW
2314 if (args->used == args->capacity)
2315 {
2316 args->capacity += args->capacity + 100;
2317 args->tokens = (const cpp_token **)
417f3e3a
ZW
2318 xrealloc ((PTR) args->tokens,
2319 args->capacity * sizeof (const cpp_token *));
041c3194
ZW
2320 }
2321 args->tokens[args->used++] = token;
c5a04734
ZW
2322}
2323
041c3194
ZW
2324/* Take and save raw tokens until we finish one argument. Empty
2325 arguments are saved as a single CPP_PLACEMARKER token. */
2326static const cpp_token *
2327parse_arg (pfile, var_args, paren_context, args, pcount)
c5a04734 2328 cpp_reader *pfile;
041c3194
ZW
2329 int var_args;
2330 unsigned int paren_context;
2331 macro_args *args;
2332 unsigned int *pcount;
c5a04734 2333{
041c3194
ZW
2334 const cpp_token *token;
2335 unsigned int paren = 0, count = 0;
2336 int raw, was_raw = 1;
c5a04734 2337
041c3194 2338 for (count = 0;; count++)
c5a04734 2339 {
417f3e3a 2340 token = _cpp_get_token (pfile);
c5a04734 2341
041c3194 2342 switch (token->type)
c5a04734 2343 {
041c3194
ZW
2344 default:
2345 break;
c5a04734 2346
041c3194
ZW
2347 case CPP_OPEN_PAREN:
2348 paren++;
2349 break;
2350
2351 case CPP_CLOSE_PAREN:
2352 if (paren-- != 0)
2353 break;
2354 goto out;
2355
2356 case CPP_COMMA:
2357 /* Commas are not terminators within parantheses or var_args. */
2358 if (paren || var_args)
2359 break;
2360 goto out;
2361
2362 case CPP_EOF: /* Error reported by caller. */
2363 goto out;
c5a04734 2364 }
c5a04734 2365
041c3194
ZW
2366 raw = pfile->cur_context <= paren_context;
2367 if (raw != was_raw)
2368 {
2369 was_raw = raw;
2370 save_token (args, 0);
2371 count++;
c5a04734 2372 }
041c3194 2373 save_token (args, token);
c5a04734 2374 }
c5a04734
ZW
2375
2376 out:
041c3194
ZW
2377 if (count == 0)
2378 {
2379 /* Duplicate the placemarker. Then we can set its flags and
2380 position and safely be using more than one. */
2381 save_token (args, duplicate_token (pfile, &placemarker_token));
2382 count++;
2383 }
2384
2385 *pcount = count;
2386 return token;
c5a04734
ZW
2387}
2388
041c3194
ZW
2389/* This macro returns true if the argument starting at offset O of arglist
2390 A is empty - that is, it's either a single PLACEMARKER token, or a null
2391 pointer followed by a PLACEMARKER. */
2392
2393#define empty_argument(A, O) \
2394 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2395 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2396
2397/* Parse the arguments making up a macro invocation. Nested arguments
2398 are automatically macro expanded, but immediate macros are not
2399 expanded; this enables e.g. operator # to work correctly. Returns
2400 non-zero on error. */
c5a04734 2401static int
041c3194 2402parse_args (pfile, hp, args)
c5a04734 2403 cpp_reader *pfile;
041c3194
ZW
2404 cpp_hashnode *hp;
2405 macro_args *args;
c5a04734 2406{
041c3194
ZW
2407 const cpp_token *token;
2408 const cpp_toklist *macro;
2409 unsigned int total = 0;
2410 unsigned int paren_context = pfile->cur_context;
2411 int argc = 0;
2412
2413 macro = hp->value.expansion;
2414 do
c5a04734 2415 {
041c3194 2416 unsigned int count;
c5a04734 2417
041c3194
ZW
2418 token = parse_arg (pfile, (argc + 1 == macro->paramc
2419 && (macro->flags & VAR_ARGS)),
2420 paren_context, args, &count);
2421 if (argc < macro->paramc)
c5a04734 2422 {
041c3194
ZW
2423 total += count;
2424 args->ends[argc] = total;
c5a04734 2425 }
041c3194 2426 argc++;
c5a04734 2427 }
041c3194 2428 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
c5a04734 2429
041c3194
ZW
2430 if (token->type == CPP_EOF)
2431 {
92936ecf 2432 cpp_error(pfile, "unterminated argument list for macro \"%s\"", hp->name);
041c3194
ZW
2433 return 1;
2434 }
2435 else if (argc < macro->paramc)
2436 {
2437 /* A rest argument is allowed to not appear in the invocation at all.
2438 e.g. #define debug(format, args...) ...
2439 debug("string");
2440 This is exactly the same as if the rest argument had received no
563dd08a 2441 tokens - debug("string",); This extension is deprecated. */
6fee6033
ZW
2442
2443 if (argc + 1 == macro->paramc && (macro->flags & VAR_ARGS))
041c3194
ZW
2444 {
2445 /* Duplicate the placemarker. Then we can set its flags and
2446 position and safely be using more than one. */
6fee6033
ZW
2447 cpp_token *pm = duplicate_token (pfile, &placemarker_token);
2448 pm->flags = VOID_REST;
2449 save_token (args, pm);
041c3194 2450 args->ends[argc] = total + 1;
6fee6033
ZW
2451
2452 if (CPP_OPTION (pfile, c99) && CPP_PEDANTIC (pfile))
2453 cpp_pedwarn (pfile, "ISO C99 requires rest arguments to be used");
2454
041c3194
ZW
2455 return 0;
2456 }
2457 else
2458 {
92936ecf 2459 cpp_error (pfile, "not enough arguments for macro \"%s\"", hp->name);
041c3194
ZW
2460 return 1;
2461 }
2462 }
2463 /* An empty argument to an empty function-like macro is fine. */
2464 else if (argc > macro->paramc
2465 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2466 {
92936ecf 2467 cpp_error (pfile, "too many arguments for macro \"%s\"", hp->name);
041c3194
ZW
2468 return 1;
2469 }
c5a04734 2470
041c3194
ZW
2471 return 0;
2472}
2473
2474/* Adds backslashes before all backslashes and double quotes appearing
2475 in strings. Non-printable characters are converted to octal. */
2476static U_CHAR *
2477quote_string (dest, src, len)
2478 U_CHAR *dest;
2479 const U_CHAR *src;
2480 unsigned int len;
2481{
2482 while (len--)
c5a04734 2483 {
041c3194 2484 U_CHAR c = *src++;
c5a04734 2485
041c3194 2486 if (c == '\\' || c == '"')
6ab3e7dd 2487 {
041c3194
ZW
2488 *dest++ = '\\';
2489 *dest++ = c;
2490 }
2491 else
2492 {
2493 if (ISPRINT (c))
2494 *dest++ = c;
2495 else
2496 {
2497 sprintf ((char *) dest, "\\%03o", c);
2498 dest += 4;
2499 }
6ab3e7dd 2500 }
c5a04734 2501 }
c5a04734 2502
041c3194 2503 return dest;
c5a04734
ZW
2504}
2505
041c3194
ZW
2506/* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2507 CPP_STRING token containing TEXT in quoted form. */
2508static cpp_token *
2509make_string_token (token, text, len)
2510 cpp_token *token;
2511 const U_CHAR *text;
2512 unsigned int len;
2513{
2514 U_CHAR *buf;
2515
2516 buf = (U_CHAR *) xmalloc (len * 4);
2517 token->type = CPP_STRING;
2518 token->flags = 0;
bfb9dc7f
ZW
2519 token->val.str.text = buf;
2520 token->val.str.len = quote_string (buf, text, len) - buf;
041c3194
ZW
2521 return token;
2522}
2523
2524/* Allocates and converts a temporary token to a CPP_NUMBER token,
2525 evaluating to NUMBER. */
2526static cpp_token *
2527alloc_number_token (pfile, number)
c5a04734 2528 cpp_reader *pfile;
041c3194 2529 int number;
c5a04734 2530{
041c3194
ZW
2531 cpp_token *result;
2532 char *buf;
2533
2534 result = get_temp_token (pfile);
2535 buf = xmalloc (20);
2536 sprintf (buf, "%d", number);
2537
2538 result->type = CPP_NUMBER;
2539 result->flags = 0;
bfb9dc7f
ZW
2540 result->val.str.text = (U_CHAR *) buf;
2541 result->val.str.len = strlen (buf);
041c3194
ZW
2542 return result;
2543}
c5a04734 2544
041c3194
ZW
2545/* Returns a temporary token from the temporary token store of PFILE. */
2546static cpp_token *
2547get_temp_token (pfile)
2548 cpp_reader *pfile;
2549{
2550 if (pfile->temp_used == pfile->temp_alloced)
2551 {
2552 if (pfile->temp_used == pfile->temp_cap)
2553 {
2554 pfile->temp_cap += pfile->temp_cap + 20;
2555 pfile->temp_tokens = (cpp_token **) xrealloc
2556 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2557 }
2558 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2559 (sizeof (cpp_token));
2560 }
c5a04734 2561
041c3194
ZW
2562 return pfile->temp_tokens[pfile->temp_used++];
2563}
2564
2565/* Release (not free) for re-use the temporary tokens of PFILE. */
2566static void
2567release_temp_tokens (pfile)
2568 cpp_reader *pfile;
2569{
2570 while (pfile->temp_used)
c5a04734 2571 {
041c3194 2572 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
c5a04734 2573
96be6998 2574 if (TOKEN_SPELL (token) == SPELL_STRING)
c5a04734 2575 {
bfb9dc7f
ZW
2576 free ((char *) token->val.str.text);
2577 token->val.str.text = 0;
c5a04734
ZW
2578 }
2579 }
041c3194 2580}
c5a04734 2581
041c3194
ZW
2582/* Free all of PFILE's dynamically-allocated temporary tokens. */
2583void
2584_cpp_free_temp_tokens (pfile)
2585 cpp_reader *pfile;
2586{
2587 if (pfile->temp_tokens)
c5a04734 2588 {
041c3194
ZW
2589 /* It is possible, though unlikely (looking for '(' of a funlike
2590 macro into EOF), that we haven't released the tokens yet. */
2591 release_temp_tokens (pfile);
2592 while (pfile->temp_alloced)
2593 free (pfile->temp_tokens[--pfile->temp_alloced]);
2594 free (pfile->temp_tokens);
c5a04734
ZW
2595 }
2596
041c3194
ZW
2597 if (pfile->date)
2598 {
bfb9dc7f 2599 free ((char *) pfile->date->val.str.text);
041c3194 2600 free (pfile->date);
bfb9dc7f 2601 free ((char *) pfile->time->val.str.text);
041c3194
ZW
2602 free (pfile->time);
2603 }
c5a04734
ZW
2604}
2605
041c3194
ZW
2606/* Copy TOKEN into a temporary token from PFILE's store. */
2607static cpp_token *
2608duplicate_token (pfile, token)
2609 cpp_reader *pfile;
2610 const cpp_token *token;
2611{
2612 cpp_token *result = get_temp_token (pfile);
c5a04734 2613
041c3194 2614 *result = *token;
96be6998 2615 if (TOKEN_SPELL (token) == SPELL_STRING)
041c3194 2616 {
bfb9dc7f
ZW
2617 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2618 memcpy (buff, token->val.str.text, token->val.str.len);
2619 result->val.str.text = buff;
041c3194
ZW
2620 }
2621 return result;
2622}
c5a04734 2623
041c3194
ZW
2624/* Determine whether two tokens can be pasted together, and if so,
2625 what the resulting token is. Returns CPP_EOF if the tokens cannot
2626 be pasted, or the appropriate type for the merged token if they
2627 can. */
2628static enum cpp_ttype
2629can_paste (pfile, token1, token2, digraph)
2630 cpp_reader * pfile;
2631 const cpp_token *token1, *token2;
2632 int* digraph;
c5a04734 2633{
041c3194
ZW
2634 enum cpp_ttype a = token1->type, b = token2->type;
2635 int cxx = CPP_OPTION (pfile, cplusplus);
c5a04734 2636
92936ecf
ZW
2637 /* Treat named operators as if they were ordinary NAMEs. */
2638 if (token1->flags & NAMED_OP)
2639 a = CPP_NAME;
2640 if (token2->flags & NAMED_OP)
2641 b = CPP_NAME;
2642
041c3194
ZW
2643 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2644 return a + (CPP_EQ_EQ - CPP_EQ);
c5a04734 2645
041c3194 2646 switch (a)
c5a04734 2647 {
041c3194
ZW
2648 case CPP_GREATER:
2649 if (b == a) return CPP_RSHIFT;
2650 if (b == CPP_QUERY && cxx) return CPP_MAX;
2651 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2652 break;
2653 case CPP_LESS:
2654 if (b == a) return CPP_LSHIFT;
2655 if (b == CPP_QUERY && cxx) return CPP_MIN;
2656 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
9b55f29a
NB
2657 if (CPP_OPTION (pfile, digraphs))
2658 {
2659 if (b == CPP_COLON)
2660 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2661 if (b == CPP_MOD)
2662 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2663 }
041c3194 2664 break;
c5a04734 2665
041c3194
ZW
2666 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2667 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2668 case CPP_OR: if (b == a) return CPP_OR_OR; break;
c5a04734 2669
041c3194
ZW
2670 case CPP_MINUS:
2671 if (b == a) return CPP_MINUS_MINUS;
2672 if (b == CPP_GREATER) return CPP_DEREF;
2673 break;
2674 case CPP_COLON:
2675 if (b == a && cxx) return CPP_SCOPE;
9b55f29a 2676 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
041c3194
ZW
2677 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2678 break;
2679
2680 case CPP_MOD:
9b55f29a
NB
2681 if (CPP_OPTION (pfile, digraphs))
2682 {
2683 if (b == CPP_GREATER)
2684 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2685 if (b == CPP_COLON)
2686 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2687 }
041c3194
ZW
2688 break;
2689 case CPP_DEREF:
2690 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2691 break;
2692 case CPP_DOT:
2693 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2694 if (b == CPP_NUMBER) return CPP_NUMBER;
2695 break;
2696
2697 case CPP_HASH:
2698 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2699 /* %:%: digraph */
2700 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2701 break;
2702
2703 case CPP_NAME:
2704 if (b == CPP_NAME) return CPP_NAME;
2705 if (b == CPP_NUMBER
bfb9dc7f 2706 && is_numstart(token2->val.str.text[0])) return CPP_NAME;
041c3194 2707 if (b == CPP_CHAR
bfb9dc7f 2708 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
041c3194 2709 if (b == CPP_STRING
bfb9dc7f 2710 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
041c3194
ZW
2711 break;
2712
2713 case CPP_NUMBER:
2714 if (b == CPP_NUMBER) return CPP_NUMBER;
2715 if (b == CPP_NAME) return CPP_NUMBER;
2716 if (b == CPP_DOT) return CPP_NUMBER;
2717 /* Numbers cannot have length zero, so this is safe. */
2718 if ((b == CPP_PLUS || b == CPP_MINUS)
bfb9dc7f 2719 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
041c3194
ZW
2720 return CPP_NUMBER;
2721 break;
2722
ba89d661
ZW
2723 case CPP_OTHER:
2724 if (CPP_OPTION (pfile, objc) && token1->val.aux == '@')
2725 {
2726 if (b == CPP_NAME) return CPP_NAME;
2727 if (b == CPP_STRING) return CPP_OSTRING;
2728 }
2729
041c3194
ZW
2730 default:
2731 break;
c5a04734
ZW
2732 }
2733
041c3194
ZW
2734 return CPP_EOF;
2735}
2736
2737/* Check if TOKEN is to be ##-pasted with the token after it. */
2738static const cpp_token *
2739maybe_paste_with_next (pfile, token)
2740 cpp_reader *pfile;
2741 const cpp_token *token;
2742{
2743 cpp_token *pasted;
2744 const cpp_token *second;
2745 cpp_context *context = CURRENT_CONTEXT (pfile);
2746
2747 /* Is this token on the LHS of ## ? */
041c3194 2748
417f3e3a
ZW
2749 while ((token->flags & PASTE_LEFT)
2750 || ((context->flags & CONTEXT_PASTEL)
2751 && context->posn == context->count))
c5a04734 2752 {
417f3e3a
ZW
2753 /* Suppress macro expansion for next token, but don't conflict
2754 with the other method of suppression. If it is an argument,
2755 macro expansion within the argument will still occur. */
2756 pfile->paste_level = pfile->cur_context;
2757 second = _cpp_get_token (pfile);
2758 pfile->paste_level = 0;
2759
2760 /* Ignore placemarker argument tokens (cannot be from an empty
2761 macro since macros are not expanded). */
2762 if (token->type == CPP_PLACEMARKER)
2763 pasted = duplicate_token (pfile, second);
2764 else if (second->type == CPP_PLACEMARKER)
041c3194 2765 {
5ef865d5
ZW
2766 /* GCC has special extended semantics for , ## b where b is
2767 a varargs parameter: the comma disappears if b was given
2768 no actual arguments (not merely if b is an empty
2769 argument). */
2770 if (token->type == CPP_COMMA && second->flags & VOID_REST)
6fee6033 2771 pasted = duplicate_token (pfile, second);
417f3e3a
ZW
2772 else
2773 pasted = duplicate_token (pfile, token);
041c3194
ZW
2774 }
2775 else
041c3194 2776 {
417f3e3a
ZW
2777 int digraph = 0;
2778 enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
c5a04734 2779
417f3e3a
ZW
2780 if (type == CPP_EOF)
2781 {
2782 if (CPP_OPTION (pfile, warn_paste))
5ef865d5
ZW
2783 {
2784 /* Do not complain about , ## <whatever> if
2785 <whatever> came from a variable argument, because
2786 the author probably intended the ## to trigger
2787 the special extended semantics (see above). */
2788 if (token->type == CPP_COMMA
2789 && IS_ARG_CONTEXT (CURRENT_CONTEXT (pfile))
ff94c747 2790 && ON_REST_ARG (CURRENT_CONTEXT (pfile) - 1))
5ef865d5
ZW
2791 /* no warning */;
2792 else
2793 cpp_warning (pfile,
417f3e3a 2794 "pasting would not give a valid preprocessing token");
5ef865d5 2795 }
417f3e3a 2796 _cpp_push_token (pfile, second);
ff94c747
NB
2797 /* A short term hack to safely clear the PASTE_LEFT flag. */
2798 pasted = duplicate_token (pfile, token);
2799 pasted->flags &= ~PASTE_LEFT;
2800 return pasted;
417f3e3a 2801 }
c5a04734 2802
417f3e3a
ZW
2803 if (type == CPP_NAME || type == CPP_NUMBER)
2804 {
2805 /* Join spellings. */
2806 U_CHAR *buf, *end;
2807
2808 pasted = get_temp_token (pfile);
2809 buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2810 end = spell_token (pfile, token, buf);
2811 end = spell_token (pfile, second, end);
2812 *end = '\0';
041c3194 2813
417f3e3a
ZW
2814 if (type == CPP_NAME)
2815 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2816 else
2817 {
2818 pasted->val.str.text = uxstrdup (buf);
2819 pasted->val.str.len = end - buf;
2820 }
2821 }
ba89d661
ZW
2822 else if (type == CPP_WCHAR || type == CPP_WSTRING
2823 || type == CPP_OSTRING)
417f3e3a 2824 pasted = duplicate_token (pfile, second);
bfb9dc7f
ZW
2825 else
2826 {
417f3e3a
ZW
2827 pasted = get_temp_token (pfile);
2828 pasted->val.integer = 0;
bfb9dc7f 2829 }
417f3e3a
ZW
2830
2831 pasted->type = type;
2832 pasted->flags = digraph ? DIGRAPH : 0;
92936ecf
ZW
2833
2834 if (type == CPP_NAME && pasted->val.node->type == T_OPERATOR)
2835 {
2836 pasted->type = pasted->val.node->value.code;
2837 pasted->flags |= NAMED_OP;
2838 }
041c3194
ZW
2839 }
2840
417f3e3a
ZW
2841 /* The pasted token gets the whitespace flags and position of the
2842 first token, the PASTE_LEFT flag of the second token, plus the
2843 PASTED flag to indicate it is the result of a paste. However, we
2844 want to preserve the DIGRAPH flag. */
2845 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2846 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2847 | (second->flags & PASTE_LEFT) | PASTED);
2848 pasted->col = token->col;
2849 pasted->line = token->line;
2850
2851 /* See if there is another token to be pasted onto the one we just
2852 constructed. */
2853 token = pasted;
2854 context = CURRENT_CONTEXT (pfile);
2855 /* and loop */
041c3194 2856 }
417f3e3a 2857 return token;
041c3194
ZW
2858}
2859
2860/* Convert a token sequence to a single string token according to the
2861 rules of the ISO C #-operator. */
2862#define INIT_SIZE 200
2863static cpp_token *
2864stringify_arg (pfile, token)
c5a04734 2865 cpp_reader *pfile;
041c3194 2866 const cpp_token *token;
c5a04734 2867{
041c3194
ZW
2868 cpp_token *result;
2869 unsigned char *main_buf;
2870 unsigned int prev_value, backslash_count = 0;
2871 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
c5a04734 2872
417f3e3a 2873 push_arg_context (pfile, token);
041c3194
ZW
2874 prev_value = prevent_macro_expansion (pfile);
2875 main_buf = (unsigned char *) xmalloc (buf_cap);
c5a04734 2876
041c3194
ZW
2877 result = get_temp_token (pfile);
2878 ASSIGN_FLAGS_AND_POS (result, token);
2879
417f3e3a 2880 for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
c5a04734 2881 {
041c3194
ZW
2882 int escape;
2883 unsigned char *buf;
2884 unsigned int len = TOKEN_LEN (token);
c5a04734 2885
96be6998
ZW
2886 if (token->type == CPP_PLACEMARKER)
2887 continue;
2888
041c3194
ZW
2889 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2890 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2891 if (escape)
2892 len *= 4 + 1;
2893
2894 if (buf_used + len > buf_cap)
c5a04734 2895 {
041c3194
ZW
2896 buf_cap = buf_used + len + INIT_SIZE;
2897 main_buf = xrealloc (main_buf, buf_cap);
2898 }
c5a04734 2899
041c3194
ZW
2900 if (whitespace && (token->flags & PREV_WHITE))
2901 main_buf[buf_used++] = ' ';
c5a04734 2902
041c3194
ZW
2903 if (escape)
2904 buf = (unsigned char *) xmalloc (len);
2905 else
2906 buf = main_buf + buf_used;
2907
2908 len = spell_token (pfile, token, buf) - buf;
2909 if (escape)
2910 {
2911 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2912 free (buf);
2913 }
2914 else
2915 buf_used += len;
c5a04734 2916
041c3194
ZW
2917 whitespace = 1;
2918 if (token->type == CPP_BACKSLASH)
2919 backslash_count++;
2920 else
2921 backslash_count = 0;
2922 }
c5a04734 2923
041c3194
ZW
2924 /* Ignore the final \ of invalid string literals. */
2925 if (backslash_count & 1)
2926 {
2927 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2928 buf_used--;
2929 }
c5a04734 2930
041c3194 2931 result->type = CPP_STRING;
bfb9dc7f
ZW
2932 result->val.str.text = main_buf;
2933 result->val.str.len = buf_used;
041c3194
ZW
2934 restore_macro_expansion (pfile, prev_value);
2935 return result;
2936}
c5a04734 2937
041c3194
ZW
2938/* Allocate more room on the context stack of PFILE. */
2939static void
2940expand_context_stack (pfile)
2941 cpp_reader *pfile;
2942{
2943 pfile->context_cap += pfile->context_cap + 20;
2944 pfile->contexts = (cpp_context *)
2945 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2946}
c5a04734 2947
041c3194
ZW
2948/* Push the context of macro NODE onto the context stack. TOKEN is
2949 the CPP_NAME token invoking the macro. */
417f3e3a
ZW
2950static int
2951push_macro_context (pfile, token)
041c3194 2952 cpp_reader *pfile;
041c3194
ZW
2953 const cpp_token *token;
2954{
2955 unsigned char orig_flags;
2956 macro_args *args;
2957 cpp_context *context;
417f3e3a 2958 cpp_hashnode *node = token->val.node;
c5a04734 2959
041c3194
ZW
2960 /* Token's flags may change when parsing args containing a nested
2961 invocation of this macro. */
2962 orig_flags = token->flags & (PREV_WHITE | BOL);
2963 args = 0;
2964 if (node->value.expansion->paramc >= 0)
c5a04734 2965 {
041c3194
ZW
2966 unsigned int error, prev_nme;
2967
2968 /* Allocate room for the argument contexts, and parse them. */
2969 args = (macro_args *) xmalloc (sizeof (macro_args));
2970 args->ends = (unsigned int *)
2971 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2972 args->tokens = 0;
2973 args->capacity = 0;
2974 args->used = 0;
041c3194
ZW
2975
2976 prev_nme = prevent_macro_expansion (pfile);
2977 pfile->args = args;
2978 error = parse_args (pfile, node, args);
2979 pfile->args = 0;
2980 restore_macro_expansion (pfile, prev_nme);
2981 if (error)
2982 {
2983 free_macro_args (args);
417f3e3a 2984 return 1;
041c3194 2985 }
08ebc1c5
NB
2986 /* Set the level after the call to parse_args. */
2987 args->level = pfile->cur_context;
c5a04734 2988 }
c5a04734 2989
041c3194
ZW
2990 /* Now push its context. */
2991 pfile->cur_context++;
2992 if (pfile->cur_context == pfile->context_cap)
2993 expand_context_stack (pfile);
2994
2995 context = CURRENT_CONTEXT (pfile);
2996 context->u.list = node->value.expansion;
2997 context->args = args;
2998 context->posn = 0;
2999 context->count = context->u.list->tokens_used;
3000 context->level = pfile->cur_context;
3001 context->flags = 0;
3002 context->pushed_token = 0;
3003
3004 /* Set the flags of the first token. We know there must
3005 be one, empty macros are a single placemarker token. */
3006 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
3007
417f3e3a 3008 return 0;
c5a04734
ZW
3009}
3010
041c3194
ZW
3011/* Push an argument to the current macro onto the context stack.
3012 TOKEN is the MACRO_ARG token representing the argument expansion. */
417f3e3a 3013static void
041c3194
ZW
3014push_arg_context (pfile, token)
3015 cpp_reader *pfile;
3016 const cpp_token *token;
c5a04734 3017{
041c3194
ZW
3018 cpp_context *context;
3019 macro_args *args;
3020
3021 pfile->cur_context++;
3022 if (pfile->cur_context == pfile->context_cap)
3023 expand_context_stack (pfile);
3024
3025 context = CURRENT_CONTEXT (pfile);
3026 args = context[-1].args;
3027
3028 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
3029 context->u.arg = args->tokens + context->count;
3030 context->count = args->ends[token->val.aux] - context->count;
3031 context->args = 0;
3032 context->posn = 0;
3033 context->level = args->level;
3034 context->flags = CONTEXT_ARG | CONTEXT_RAW;
3035 context->pushed_token = 0;
3036
3037 /* Set the flags of the first token. There is one. */
3038 {
3039 const cpp_token *first = context->u.arg[0];
3040 if (!first)
3041 first = context->u.arg[1];
c5a04734 3042
041c3194
ZW
3043 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
3044 token->flags & (PREV_WHITE | BOL));
3045 }
c5a04734 3046
041c3194
ZW
3047 if (token->flags & PASTE_LEFT)
3048 context->flags |= CONTEXT_PASTEL;
3049 if (pfile->paste_level)
3050 context->flags |= CONTEXT_PASTER;
c5a04734
ZW
3051}
3052
041c3194
ZW
3053/* "Unget" a token. It is effectively inserted in the token queue and
3054 will be returned by the next call to get_raw_token. */
c5a04734 3055void
041c3194 3056_cpp_push_token (pfile, token)
c5a04734 3057 cpp_reader *pfile;
041c3194 3058 const cpp_token *token;
c5a04734 3059{
041c3194 3060 cpp_context *context = CURRENT_CONTEXT (pfile);
96be6998
ZW
3061
3062 if (context->posn > 0)
3063 {
3064 const cpp_token *prev;
3065 if (IS_ARG_CONTEXT (context))
3066 prev = context->u.arg[context->posn - 1];
3067 else
3068 prev = &context->u.list->tokens[context->posn - 1];
3069
3070 if (prev == token)
3071 {
3072 context->posn--;
3073 return;
3074 }
3075 }
3076
041c3194
ZW
3077 if (context->pushed_token)
3078 cpp_ice (pfile, "two tokens pushed in a row");
3079 if (token->type != CPP_EOF)
3080 context->pushed_token = token;
3081 /* Don't push back a directive's CPP_EOF, step back instead. */
3082 else if (pfile->cur_context == 0)
3083 pfile->contexts[0].posn--;
3084}
c5a04734 3085
041c3194
ZW
3086/* Handle a preprocessing directive. TOKEN is the CPP_HASH token
3087 introducing the directive. */
3088static void
3089process_directive (pfile, token)
3090 cpp_reader *pfile;
3091 const cpp_token *token;
3092{
3093 const struct directive *d = pfile->token_list.directive;
3094 int prev_nme = 0;
3095
3096 /* Skip over the directive name. */
3097 if (token[1].type == CPP_NAME)
3098 _cpp_get_raw_token (pfile);
3099 else if (token[1].type != CPP_NUMBER)
96be6998 3100 cpp_ice (pfile, "directive begins with %s?!", TOKEN_NAME (token));
041c3194 3101
041c3194
ZW
3102 if (! (d->flags & EXPAND))
3103 prev_nme = prevent_macro_expansion (pfile);
3104 (void) (*d->handler) (pfile);
3105 if (! (d->flags & EXPAND))
3106 restore_macro_expansion (pfile, prev_nme);
3107 _cpp_skip_rest_of_line (pfile);
3108}
c5a04734 3109
041c3194
ZW
3110/* The external interface to return the next token. All macro
3111 expansion and directive processing is handled internally, the
3112 caller only ever sees the output after preprocessing. */
3113const cpp_token *
3114cpp_get_token (pfile)
3115 cpp_reader *pfile;
3116{
3117 const cpp_token *token;
417f3e3a 3118 /* Loop till we hit a non-directive, non-placemarker token. */
041c3194
ZW
3119 for (;;)
3120 {
417f3e3a
ZW
3121 token = _cpp_get_token (pfile);
3122
3123 if (token->type == CPP_PLACEMARKER)
3124 continue;
3125
3126 if (token->type == CPP_HASH && token->flags & BOL
041c3194 3127 && pfile->token_list.directive)
c5a04734 3128 {
041c3194
ZW
3129 process_directive (pfile, token);
3130 continue;
c5a04734
ZW
3131 }
3132
417f3e3a
ZW
3133 return token;
3134 }
3135}
3136
3137/* The internal interface to return the next token. There are two
3138 differences between the internal and external interfaces: the
3139 internal interface may return a PLACEMARKER token, and it does not
3140 process directives. */
3141const cpp_token *
3142_cpp_get_token (pfile)
3143 cpp_reader *pfile;
3144{
6ead1e99 3145 const cpp_token *token, *old_token;
417f3e3a
ZW
3146 cpp_hashnode *node;
3147
3148 /* Loop until we hit a non-macro token. */
3149 for (;;)
3150 {
3151 token = get_raw_token (pfile);
3152
041c3194
ZW
3153 /* Short circuit EOF. */
3154 if (token->type == CPP_EOF)
3155 return token;
417f3e3a
ZW
3156
3157 /* If we are skipping... */
3158 if (pfile->skipping)
c5a04734 3159 {
417f3e3a
ZW
3160 /* we still have to process directives, */
3161 if (pfile->token_list.directive)
3162 return token;
3163
3164 /* but everything else is ignored. */
041c3194
ZW
3165 _cpp_skip_rest_of_line (pfile);
3166 continue;
3167 }
c5a04734 3168
417f3e3a
ZW
3169 /* If there's a potential control macro and we get here, then that
3170 #ifndef didn't cover the entire file and its argument shouldn't
3171 be taken as a control macro. */
3172 pfile->potential_control_macro = 0;
c5a04734 3173
bbdac7d0
ZW
3174 /* If we are rescanning preprocessed input, no macro expansion or
3175 token pasting may occur. */
3176 if (CPP_OPTION (pfile, preprocessed))
3177 return token;
3178
6ead1e99
ZW
3179 old_token = token;
3180
417f3e3a
ZW
3181 /* See if there's a token to paste with this one. */
3182 if (!pfile->paste_level)
3183 token = maybe_paste_with_next (pfile, token);
c5a04734 3184
417f3e3a 3185 /* If it isn't a macro, return it now. */
92936ecf 3186 if (token->type != CPP_NAME || token->val.node->type == T_VOID)
417f3e3a 3187 return token;
c5a04734 3188
92936ecf 3189 /* Is macro expansion disabled in general, or are we in the
6ead1e99
ZW
3190 middle of a token paste, or was this token just pasted?
3191 (Note we don't check token->flags & PASTED, because that
3192 counts tokens that were pasted at some point in the past,
3193 we're only interested in tokens that were pasted by this call
3194 to maybe_paste_with_next.) */
3195 if (pfile->no_expand_level == pfile->cur_context
3196 || pfile->paste_level
3197 || (token != old_token
3198 && pfile->no_expand_level + 1 == pfile->cur_context))
417f3e3a 3199 return token;
6ead1e99 3200
417f3e3a
ZW
3201 node = token->val.node;
3202 if (node->type != T_MACRO)
3203 return special_symbol (pfile, node, token);
c5a04734 3204
041c3194
ZW
3205 if (is_macro_disabled (pfile, node->value.expansion, token))
3206 return token;
c5a04734 3207
417f3e3a
ZW
3208 if (push_macro_context (pfile, token))
3209 return token;
3210 /* else loop */
041c3194 3211 }
041c3194 3212}
c5a04734 3213
041c3194
ZW
3214/* Returns the next raw token, i.e. without performing macro
3215 expansion. Argument contexts are automatically entered. */
3216static const cpp_token *
3217get_raw_token (pfile)
3218 cpp_reader *pfile;
3219{
3220 const cpp_token *result;
417f3e3a 3221 cpp_context *context;
c5a04734 3222
417f3e3a 3223 for (;;)
041c3194 3224 {
417f3e3a
ZW
3225 context = CURRENT_CONTEXT (pfile);
3226 if (context->pushed_token)
041c3194 3227 {
417f3e3a
ZW
3228 result = context->pushed_token;
3229 context->pushed_token = 0;
6fee6033 3230 return result; /* Cannot be a CPP_MACRO_ARG */
417f3e3a
ZW
3231 }
3232 else if (context->posn == context->count)
3233 {
3234 if (pop_context (pfile))
3235 return &eof_token;
3236 continue;
3237 }
6fee6033 3238 else if (IS_ARG_CONTEXT (context))
417f3e3a 3239 {
6fee6033
ZW
3240 result = context->u.arg[context->posn++];
3241 if (result == 0)
c5a04734 3242 {
6fee6033 3243 context->flags ^= CONTEXT_RAW;
041c3194 3244 result = context->u.arg[context->posn++];
c5a04734 3245 }
6fee6033 3246 return result; /* Cannot be a CPP_MACRO_ARG */
041c3194 3247 }
c5a04734 3248
6fee6033
ZW
3249 result = &context->u.list->tokens[context->posn++];
3250
417f3e3a
ZW
3251 if (result->type != CPP_MACRO_ARG)
3252 return result;
3253
3254 if (result->flags & STRINGIFY_ARG)
3255 return stringify_arg (pfile, result);
3256
3257 push_arg_context (pfile, result);
3258 }
041c3194 3259}
c5a04734 3260
041c3194
ZW
3261/* Internal interface to get the token without macro expanding. */
3262const cpp_token *
3263_cpp_get_raw_token (pfile)
3264 cpp_reader *pfile;
3265{
3266 int prev_nme = prevent_macro_expansion (pfile);
417f3e3a 3267 const cpp_token *result = _cpp_get_token (pfile);
041c3194
ZW
3268 restore_macro_expansion (pfile, prev_nme);
3269 return result;
3270}
c5a04734 3271
041c3194
ZW
3272/* A thin wrapper to lex_line. CLEAR is non-zero if the current token
3273 list should be overwritten, or zero if we need to append
3274 (typically, if we are within the arguments to a macro, or looking
3275 for the '(' to start a function-like macro invocation). */
3276static int
3277lex_next (pfile, clear)
3278 cpp_reader *pfile;
3279 int clear;
3280{
3281 cpp_toklist *list = &pfile->token_list;
3282 const cpp_token *old_list = list->tokens;
3283 unsigned int old_used = list->tokens_used;
c5a04734 3284
041c3194 3285 if (clear)
c5a04734 3286 {
041c3194
ZW
3287 /* Release all temporary tokens. */
3288 _cpp_clear_toklist (list);
3289 pfile->contexts[0].posn = 0;
3290 if (pfile->temp_used)
3291 release_temp_tokens (pfile);
c5a04734 3292 }
041c3194
ZW
3293 lex_line (pfile, list);
3294 pfile->contexts[0].count = list->tokens_used;
c5a04734 3295
041c3194 3296 if (!clear && pfile->args)
c5a04734 3297 {
041c3194
ZW
3298 /* Fix up argument token pointers. */
3299 if (old_list != list->tokens)
3300 {
3301 unsigned int i;
3302
3303 for (i = 0; i < pfile->args->used; i++)
3304 {
3305 const cpp_token *token = pfile->args->tokens[i];
3306 if (token >= old_list && token < old_list + old_used)
3307 pfile->args->tokens[i] = (const cpp_token *)
3308 ((char *) token + ((char *) list->tokens - (char *) old_list));
3309 }
3310 }
3311
3312 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3313 tokens within the list of arguments that would otherwise act as
3314 preprocessing directives, the behavior is undefined.
3315
3316 This implementation will report a hard error and treat the
3317 'sequence of preprocessing tokens' as part of the macro argument,
3318 not a directive.
3319
3320 Note if pfile->args == 0, we're OK since we're only inside a
3321 macro argument after a '('. */
3322 if (list->directive)
3323 {
3324 cpp_error_with_line (pfile, list->tokens[old_used].line,
3325 list->tokens[old_used].col,
3326 "#%s may not be used inside a macro argument",
3327 list->directive->name);
91fcd158 3328 return 1;
041c3194 3329 }
c5a04734
ZW
3330 }
3331
041c3194 3332 return 0;
c5a04734
ZW
3333}
3334
417f3e3a
ZW
3335/* Pops a context off the context stack. If we're at the bottom, lexes
3336 the next logical line. Returns EOF if we're at the end of the
5ef865d5 3337 argument list to the # operator, or we should not "overflow"
041c3194
ZW
3338 into the rest of the file (e.g. 6.10.3.1.1). */
3339static int
417f3e3a 3340pop_context (pfile)
041c3194
ZW
3341 cpp_reader *pfile;
3342{
3343 cpp_context *context;
3fef5b2b 3344
041c3194 3345 if (pfile->cur_context == 0)
417f3e3a
ZW
3346 {
3347 /* If we are currently processing a directive, do not advance. 6.10
3348 paragraph 2: A new-line character ends the directive even if it
3349 occurs within what would otherwise be an invocation of a
3350 function-like macro. */
3351 if (pfile->token_list.directive)
3352 return 1;
3353
3354 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3355 }
041c3194
ZW
3356
3357 /* Argument contexts, when parsing args or handling # operator
3358 return CPP_EOF at the end. */
3359 context = CURRENT_CONTEXT (pfile);
3360 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3361 return 1;
3362
3363 /* Free resources when leaving macro contexts. */
3364 if (context->args)
3365 free_macro_args (context->args);
3366
3367 if (pfile->cur_context == pfile->no_expand_level)
3368 pfile->no_expand_level--;
3369 pfile->cur_context--;
3370
3371 return 0;
3372}
3373
041c3194
ZW
3374/* Turn off macro expansion at the current context level. */
3375static unsigned int
3376prevent_macro_expansion (pfile)
3377 cpp_reader *pfile;
3378{
3379 unsigned int prev_value = pfile->no_expand_level;
3380 pfile->no_expand_level = pfile->cur_context;
3381 return prev_value;
3382}
3383
3384/* Restore macro expansion to its previous state. */
3385static void
3386restore_macro_expansion (pfile, prev_value)
3387 cpp_reader *pfile;
3388 unsigned int prev_value;
3389{
3390 pfile->no_expand_level = prev_value;
3391}
3392
3393/* Used by cpperror.c to obtain the correct line and column to report
3394 in a diagnostic. */
3395unsigned int
3396_cpp_get_line (pfile, pcol)
3397 cpp_reader *pfile;
3398 unsigned int *pcol;
3399{
3400 unsigned int index;
3401 const cpp_token *cur_token;
3402
3403 if (pfile->in_lex_line)
3404 index = pfile->token_list.tokens_used;
3405 else
3406 index = pfile->contexts[0].posn;
3407
6ead1e99
ZW
3408 if (index == 0)
3409 {
3410 if (pcol)
3411 *pcol = 0;
3412 return 0;
3413 }
3414
041c3194
ZW
3415 cur_token = &pfile->token_list.tokens[index - 1];
3416 if (pcol)
3417 *pcol = cur_token->col;
3418 return cur_token->line;
3419}
3420
3421#define DSC(str) (const U_CHAR *)str, sizeof str - 1
3422static const char * const monthnames[] =
3423{
3424 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3425 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3426};
3427
3428/* Handle builtin macros like __FILE__. */
3429static const cpp_token *
3430special_symbol (pfile, node, token)
3431 cpp_reader *pfile;
3432 cpp_hashnode *node;
d1d9a6bd 3433 const cpp_token *token;
3fef5b2b 3434{
041c3194
ZW
3435 cpp_token *result;
3436 cpp_buffer *ip;
3fef5b2b 3437
041c3194 3438 switch (node->type)
3fef5b2b 3439 {
041c3194
ZW
3440 case T_FILE:
3441 case T_BASE_FILE:
3fef5b2b 3442 {
041c3194 3443 const char *file;
3fef5b2b 3444
041c3194
ZW
3445 ip = CPP_BUFFER (pfile);
3446 if (ip == 0)
3447 file = "";
3fef5b2b 3448 else
041c3194
ZW
3449 {
3450 if (node->type == T_BASE_FILE)
3451 while (CPP_PREV_BUFFER (ip) != NULL)
3452 ip = CPP_PREV_BUFFER (ip);
3453
3454 file = ip->nominal_fname;
3455 }
3456 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3457 strlen (file));
3458 }
3459 break;
3fef5b2b 3460
041c3194 3461 case T_INCLUDE_LEVEL:
f9a0e96c
ZW
3462 /* pfile->include_depth counts the primary source as level 1,
3463 but historically __INCLUDE_DEPTH__ has called the primary
3464 source level 0. */
3465 result = alloc_number_token (pfile, pfile->include_depth - 1);
3fef5b2b
NB
3466 break;
3467
041c3194
ZW
3468 case T_SPECLINE:
3469 /* If __LINE__ is embedded in a macro, it must expand to the
3470 line of the macro's invocation, not its definition.
3471 Otherwise things like assert() will not work properly. */
3472 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
5d7ee2fa
NB
3473 break;
3474
041c3194 3475 case T_STDC:
3fef5b2b 3476 {
041c3194 3477 int stdc = 1;
3fef5b2b 3478
041c3194
ZW
3479#ifdef STDC_0_IN_SYSTEM_HEADERS
3480 if (CPP_IN_SYSTEM_HEADER (pfile)
bfb9dc7f 3481 && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
041c3194
ZW
3482 stdc = 0;
3483#endif
3484 result = alloc_number_token (pfile, stdc);
3fef5b2b
NB
3485 }
3486 break;
3487
041c3194
ZW
3488 case T_DATE:
3489 case T_TIME:
3490 if (pfile->date == 0)
3491 {
3492 /* Allocate __DATE__ and __TIME__ from permanent storage,
3493 and save them in pfile so we don't have to do this again.
3494 We don't generate these strings at init time because
3495 time() and localtime() are very slow on some systems. */
3496 time_t tt = time (NULL);
3497 struct tm *tb = localtime (&tt);
3498
3499 pfile->date = make_string_token
3500 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3501 pfile->time = make_string_token
3502 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3503
bfb9dc7f 3504 sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
041c3194 3505 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
bfb9dc7f 3506 sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
041c3194
ZW
3507 tb->tm_hour, tb->tm_min, tb->tm_sec);
3508 }
3509 result = node->type == T_DATE ? pfile->date: pfile->time;
3fef5b2b
NB
3510 break;
3511
041c3194 3512 case T_POISON:
bfb9dc7f 3513 cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
041c3194
ZW
3514 return token;
3515
3516 default:
3517 cpp_ice (pfile, "invalid special hash type");
3518 return token;
3fef5b2b
NB
3519 }
3520
041c3194
ZW
3521 ASSIGN_FLAGS_AND_POS (result, token);
3522 return result;
3523}
3524#undef DSC
3525
61d0346d 3526/* Allocate pfile->input_buffer, and initialize _cpp_trigraph_map[]
041c3194
ZW
3527 if it hasn't happened already. */
3528
3529void
3530_cpp_init_input_buffer (pfile)
3531 cpp_reader *pfile;
3532{
417f3e3a
ZW
3533 cpp_context *base;
3534
417f3e3a
ZW
3535 _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3536 pfile->no_expand_level = UINT_MAX;
041c3194 3537 pfile->context_cap = 20;
041c3194 3538 pfile->cur_context = 0;
041c3194 3539
417f3e3a
ZW
3540 pfile->contexts = (cpp_context *)
3541 xmalloc (pfile->context_cap * sizeof (cpp_context));
041c3194 3542
417f3e3a
ZW
3543 /* Clear the base context. */
3544 base = &pfile->contexts[0];
3545 base->u.list = &pfile->token_list;
3546 base->posn = 0;
3547 base->count = 0;
3548 base->args = 0;
3549 base->level = 0;
3550 base->flags = 0;
3551 base->pushed_token = 0;
041c3194
ZW
3552}
3553
3554/* Moves to the end of the directive line, popping contexts as
3555 necessary. */
3556void
3557_cpp_skip_rest_of_line (pfile)
3558 cpp_reader *pfile;
3559{
417f3e3a
ZW
3560 /* Discard all stacked contexts. */
3561 int i;
3562 for (i = pfile->cur_context; i > 0; i--)
3563 if (pfile->contexts[i].args)
3564 free_macro_args (pfile->contexts[i].args);
3565
3566 if (pfile->no_expand_level <= pfile->cur_context)
3567 pfile->no_expand_level = 0;
3568 pfile->cur_context = 0;
041c3194 3569
417f3e3a
ZW
3570 /* Clear the base context, and clear the directive pointer so that
3571 get_raw_token will advance to the next line. */
3572 pfile->contexts[0].count = 0;
3573 pfile->contexts[0].posn = 0;
041c3194 3574 pfile->token_list.directive = 0;
c5a04734
ZW
3575}
3576
041c3194
ZW
3577/* Directive handler wrapper used by the command line option
3578 processor. */
3579void
2c8f0515 3580_cpp_run_directive (pfile, dir, buf, count, name)
041c3194
ZW
3581 cpp_reader *pfile;
3582 const struct directive *dir;
3583 const char *buf;
3584 size_t count;
2c8f0515 3585 const char *name;
041c3194
ZW
3586{
3587 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3588 {
3589 unsigned int prev_lvl = 0;
417f3e3a 3590
2c8f0515
ZW
3591 if (name)
3592 CPP_BUFFER (pfile)->nominal_fname = name;
3593 else
3594 CPP_BUFFER (pfile)->nominal_fname = _("<command line>");
3595 CPP_BUFFER (pfile)->lineno = (unsigned int)-1;
3596
417f3e3a
ZW
3597 /* Scan the line now, else prevent_macro_expansion won't work. */
3598 lex_next (pfile, 1);
041c3194
ZW
3599 if (! (dir->flags & EXPAND))
3600 prev_lvl = prevent_macro_expansion (pfile);
3601
3602 (void) (*dir->handler) (pfile);
3603
3604 if (! (dir->flags & EXPAND))
3605 restore_macro_expansion (pfile, prev_lvl);
3606
3607 _cpp_skip_rest_of_line (pfile);
3608 cpp_pop_buffer (pfile);
3609 }
3610}
This page took 0.609291 seconds and 5 git commands to generate.