]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
Make-lang.in (JAVA_SRCS): Include java-tree.h.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
041c3194
ZW
23/*
24
25Cleanups to do:-
26
041c3194 27o Check line numbers assigned to all errors.
041c3194
ZW
28o Distinguish integers, floats, and 'other' pp-numbers.
29o Store ints and char constants as binary values.
30o New command-line assertion syntax.
041c3194
ZW
31o Work towards functions in cpperror.c taking a message level parameter.
32 If we do this, merge the common code of do_warning and do_error.
33o Comment all functions, and describe macro expansion algorithm.
34o Move as much out of header files as possible.
35o Remove single quote pairs `', and some '', from diagnostics.
36o Correct pastability test for CPP_NAME and CPP_NUMBER.
37
38*/
39
45b966db
ZW
40#include "config.h"
41#include "system.h"
42#include "intl.h"
43#include "cpplib.h"
44#include "cpphash.h"
041c3194 45#include "symcat.h"
45b966db 46
0d9f234d
NB
47static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER,
48 0 UNION_INIT_ZERO};
f9a0e96c
ZW
49static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
50
51/* Flags for cpp_context. */
52#define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
53#define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
54#define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
55#define CONTEXT_ARG (1 << 3) /* If an argument context. */
56
57typedef struct cpp_context cpp_context;
58struct cpp_context
59{
60 union
61 {
62 const cpp_toklist *list; /* Used for macro contexts only. */
63 const cpp_token **arg; /* Used for arg contexts only. */
64 } u;
65
66 /* Pushed token to be returned by next call to get_raw_token. */
67 const cpp_token *pushed_token;
68
f67798e7
NB
69 struct macro_args *args; /* The arguments for a function-like
70 macro. NULL otherwise. */
f9a0e96c
ZW
71 unsigned short posn; /* Current posn, index into u. */
72 unsigned short count; /* No. of tokens in u. */
73 unsigned short level;
74 unsigned char flags;
75};
76
77typedef struct macro_args macro_args;
78struct macro_args
79{
80 unsigned int *ends;
81 const cpp_token **tokens;
82 unsigned int capacity;
83 unsigned int used;
84 unsigned short level;
85};
86
87static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
88static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
89 macro_args *, unsigned int *));
90static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
91static void save_token PARAMS ((macro_args *, const cpp_token *));
92static int pop_context PARAMS ((cpp_reader *));
93static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
94static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
95static void free_macro_args PARAMS ((macro_args *));
58fea6af 96static void dump_param_spelling PARAMS ((FILE *, const cpp_toklist *,
1368ee70 97 unsigned int));
041c3194 98static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
9e62c811 99 unsigned int));
f2d5f0cc 100
0d9f234d
NB
101static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
102static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
103static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
104
041c3194 105static int skip_block_comment PARAMS ((cpp_reader *));
0d9f234d
NB
106static int skip_line_comment PARAMS ((cpp_buffer *));
107static void adjust_column PARAMS ((cpp_reader *));
108static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
109static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
110static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t));
111static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
112static void unterminated PARAMS ((cpp_reader *, unsigned int, int));
113static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
114static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
041c3194 115static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
0d9f234d
NB
116static void check_long_token PARAMS ((cpp_buffer *,
117 cpp_token *,
118 cppchar_t,
119 enum cpp_ttype));
120static void lex_token PARAMS ((cpp_reader *, cpp_token *));
041c3194 121static int lex_next PARAMS ((cpp_reader *, int));
0d9f234d
NB
122
123static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
041c3194
ZW
124static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
125 const cpp_token *));
b8f41010 126
041c3194
ZW
127static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
128static void expand_context_stack PARAMS ((cpp_reader *));
d1d9a6bd 129static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
041c3194 130 unsigned char *));
58fea6af
ZW
131static void output_token PARAMS ((cpp_reader *, FILE *, const cpp_token *,
132 const cpp_token *, int));
b8f41010
NB
133typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
134 cpp_token *));
041c3194
ZW
135static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
136 unsigned int));
137static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
138static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
139 const cpp_token *));
140static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
141static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
142 const cpp_token *));
143static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
144 const cpp_token *, int *));
145static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
146static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
147static cpp_token *get_temp_token PARAMS ((cpp_reader *));
148static void release_temp_tokens PARAMS ((cpp_reader *));
149static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
150static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
b8f41010 151
041c3194
ZW
152#define VALID_SIGN(c, prevc) \
153 (((c) == '+' || (c) == '-') && \
154 ((prevc) == 'e' || (prevc) == 'E' \
155 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
156
f617b8e2
NB
157/* An upper bound on the number of bytes needed to spell a token,
158 including preceding whitespace. */
58fea6af
ZW
159static inline size_t TOKEN_LEN PARAMS ((const cpp_token *));
160static inline size_t
161TOKEN_LEN (token)
162 const cpp_token *token;
163{
164 size_t len;
165
166 switch (TOKEN_SPELL (token))
167 {
168 default: len = 0; break;
169 case SPELL_STRING: len = token->val.str.len; break;
170 case SPELL_IDENT: len = token->val.node->length; break;
171 }
172 return len + 5;
173}
f617b8e2 174
f9a0e96c
ZW
175#define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
176#define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
5ef865d5 177#define ON_REST_ARG(c) \
ff94c747
NB
178 (((c)->u.list->flags & VAR_ARGS) \
179 && (c)->u.list->tokens[(c)->posn - 1].val.aux \
180 == (unsigned int) ((c)->u.list->paramc - 1))
f9a0e96c
ZW
181
182#define ASSIGN_FLAGS_AND_POS(d, s) \
183 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
184 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
185 } while (0)
186
187/* f is flags, just consisting of PREV_WHITE | BOL. */
188#define MODIFY_FLAGS_AND_POS(d, s, f) \
189 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
190 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
191 } while (0)
192
96be6998
ZW
193#define OP(e, s) { SPELL_OPERATOR, U s },
194#define TK(e, s) { s, U STRINGX (e) },
b8f41010 195
041c3194 196const struct token_spelling
96be6998 197_cpp_token_spellings [N_TTYPES] = {TTYPE_TABLE };
041c3194 198
96be6998
ZW
199#undef OP
200#undef TK
b8f41010 201
f2d5f0cc
ZW
202/* Notify the compiler proper that the current line number has jumped,
203 or the current file name has changed. */
204
205static void
1368ee70 206output_line_command (pfile, print, line)
45b966db 207 cpp_reader *pfile;
f2d5f0cc 208 cpp_printer *print;
1368ee70 209 unsigned int line;
45b966db 210{
041c3194 211 cpp_buffer *ip = CPP_BUFFER (pfile);
f2d5f0cc 212
041c3194
ZW
213 if (line == 0)
214 return;
215
216 /* End the previous line of text. */
217 if (pfile->need_newline)
58fea6af
ZW
218 {
219 putc ('\n', print->outf);
220 print->lineno++;
221 }
041c3194
ZW
222 pfile->need_newline = 0;
223
f2d5f0cc
ZW
224 if (CPP_OPTION (pfile, no_line_commands))
225 return;
226
f2d5f0cc
ZW
227 /* If the current file has not changed, we can output a few newlines
228 instead if we want to increase the line number by a small amount.
229 We cannot do this if print->lineno is zero, because that means we
230 haven't output any line commands yet. (The very first line
58fea6af
ZW
231 command output is a `same_file' command.)
232
233 'nominal_fname' values are unique, so they can be compared by
234 comparing pointers. */
235 if (ip->nominal_fname == print->last_fname && print->lineno > 0
f2d5f0cc 236 && line >= print->lineno && line < print->lineno + 8)
45b966db 237 {
f2d5f0cc 238 while (line > print->lineno)
45b966db 239 {
f2d5f0cc
ZW
240 putc ('\n', print->outf);
241 print->lineno++;
45b966db 242 }
f2d5f0cc 243 return;
45b966db 244 }
f2d5f0cc 245
58fea6af
ZW
246 fprintf (print->outf, "# %u \"%s\"%s\n", line, ip->nominal_fname,
247 cpp_syshdr_flags (pfile, ip));
248
249 print->last_fname = ip->nominal_fname;
f2d5f0cc
ZW
250 print->lineno = line;
251}
252
58fea6af
ZW
253/* Like fprintf, but writes to a printer object. You should be sure
254 always to generate a complete line when you use this function. */
f2d5f0cc 255void
58fea6af
ZW
256cpp_printf VPARAMS ((cpp_reader *pfile, cpp_printer *print,
257 const char *fmt, ...))
258{
259 va_list ap;
260#ifndef ANSI_PROTOTYPES
261 cpp_reader *pfile;
262 cpp_printer *print;
263 const char *fmt;
264#endif
45b966db 265
58fea6af
ZW
266 VA_START (ap, fmt);
267
268#ifndef ANSI_PROTOTYPES
269 pfile = va_arg (ap, cpp_reader *);
270 print = va_arg (ap, cpp_printer *);
271 fmt = va_arg (ap, const char *);
272#endif
273
274 /* End the previous line of text. */
275 if (pfile->need_newline)
3cb553b4
ZW
276 {
277 putc ('\n', print->outf);
278 print->lineno++;
279 }
58fea6af
ZW
280 pfile->need_newline = 0;
281
282 vfprintf (print->outf, fmt, ap);
283 va_end (ap);
45b966db
ZW
284}
285
c56c2073 286/* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
f2d5f0cc
ZW
287
288void
289cpp_scan_buffer_nooutput (pfile)
290 cpp_reader *pfile;
291{
041c3194 292 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
2c0accc9 293 const cpp_token *token;
041c3194 294
2c0accc9 295 /* In no-output mode, we can ignore everything but directives. */
f2d5f0cc
ZW
296 for (;;)
297 {
2c0accc9
ZW
298 token = _cpp_get_token (pfile);
299
041c3194
ZW
300 if (token->type == CPP_EOF)
301 {
302 cpp_pop_buffer (pfile);
303 if (CPP_BUFFER (pfile) == stop)
304 break;
305 }
2c0accc9
ZW
306
307 if (token->type == CPP_HASH && token->flags & BOL
308 && pfile->token_list.directive)
309 {
310 process_directive (pfile, token);
311 continue;
312 }
313
041c3194 314 _cpp_skip_rest_of_line (pfile);
f2d5f0cc 315 }
f2d5f0cc
ZW
316}
317
c56c2073 318/* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
f2d5f0cc
ZW
319void
320cpp_scan_buffer (pfile, print)
321 cpp_reader *pfile;
322 cpp_printer *print;
323{
c56c2073 324 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
041c3194 325 const cpp_token *token, *prev = 0;
f2d5f0cc
ZW
326
327 for (;;)
328 {
2c0accc9 329 token = _cpp_get_token (pfile);
041c3194 330 if (token->type == CPP_EOF)
f2d5f0cc 331 {
041c3194 332 cpp_pop_buffer (pfile);
d0089985 333
041c3194 334 if (CPP_BUFFER (pfile) == stop)
f2d5f0cc 335 return;
2c0accc9 336
041c3194
ZW
337 prev = 0;
338 continue;
339 }
340
341 if (token->flags & BOL)
342 {
0d9f234d 343 output_line_command (pfile, print, token->line);
3cb553b4
ZW
344 prev = 0;
345
2c0accc9
ZW
346 if (token->type == CPP_HASH && pfile->token_list.directive)
347 {
348 process_directive (pfile, token);
349 continue;
350 }
f2d5f0cc 351 }
041c3194 352
2c0accc9 353 if (token->type != CPP_PLACEMARKER)
f9a0e96c 354 {
58fea6af
ZW
355 output_token (pfile, print->outf, token, prev, 1);
356 pfile->need_newline = 1;
f9a0e96c
ZW
357 }
358
f9a0e96c
ZW
359 prev = token;
360 }
f9a0e96c
ZW
361}
362
041c3194
ZW
363/* Helper routine used by parse_include, which can't see spell_token.
364 Reinterpret the current line as an h-char-sequence (< ... >); we are
365 looking at the first token after the <. */
366const cpp_token *
367_cpp_glue_header_name (pfile)
45b966db
ZW
368 cpp_reader *pfile;
369{
041c3194
ZW
370 const cpp_token *t;
371 cpp_token *hdr;
58fea6af
ZW
372 U_CHAR *buf, *p;
373 size_t len, avail;
374
375 avail = 40;
376 len = 0;
377 buf = xmalloc (avail);
041c3194
ZW
378
379 for (;;)
380 {
417f3e3a 381 t = _cpp_get_token (pfile);
041c3194
ZW
382 if (t->type == CPP_GREATER || t->type == CPP_EOF)
383 break;
384
58fea6af
ZW
385 if (len + TOKEN_LEN (t) > avail)
386 {
387 avail = len + TOKEN_LEN (t) + 40;
388 buf = xrealloc (buf, avail);
389 }
390
041c3194 391 if (t->flags & PREV_WHITE)
58fea6af
ZW
392 buf[len++] = ' ';
393
394 p = spell_token (pfile, t, buf + len);
395 len = (size_t) (p - buf); /* p known >= buf */
041c3194
ZW
396 }
397
398 if (t->type == CPP_EOF)
399 cpp_error (pfile, "missing terminating > character");
45b966db 400
58fea6af 401 buf = xrealloc (buf, len);
041c3194
ZW
402
403 hdr = get_temp_token (pfile);
404 hdr->type = CPP_HEADER_NAME;
405 hdr->flags = 0;
bfb9dc7f
ZW
406 hdr->val.str.text = buf;
407 hdr->val.str.len = len;
041c3194 408 return hdr;
45b966db
ZW
409}
410
1368ee70
ZW
411/* Token-buffer helper functions. */
412
d1d9a6bd
NB
413/* Expand a token list's string space. It is *vital* that
414 list->tokens_used is correct, to get pointer fix-up right. */
041c3194
ZW
415void
416_cpp_expand_name_space (list, len)
1368ee70 417 cpp_toklist *list;
c5a04734
ZW
418 unsigned int len;
419{
f617b8e2 420 const U_CHAR *old_namebuf;
f617b8e2
NB
421
422 old_namebuf = list->namebuf;
c5a04734
ZW
423 list->name_cap += len;
424 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
f617b8e2
NB
425
426 /* Fix up token text pointers. */
79f50f2a 427 if (list->namebuf != old_namebuf)
f617b8e2
NB
428 {
429 unsigned int i;
430
431 for (i = 0; i < list->tokens_used; i++)
96be6998 432 if (TOKEN_SPELL (&list->tokens[i]) == SPELL_STRING)
bfb9dc7f 433 list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
f617b8e2 434 }
1368ee70
ZW
435}
436
041c3194
ZW
437/* If there is not enough room for LEN more characters, expand the
438 list by just enough to have room for LEN characters. */
439void
440_cpp_reserve_name_space (list, len)
441 cpp_toklist *list;
442 unsigned int len;
443{
444 unsigned int room = list->name_cap - list->name_used;
445
446 if (room < len)
447 _cpp_expand_name_space (list, len - room);
448}
449
1368ee70 450/* Expand the number of tokens in a list. */
d1d9a6bd
NB
451void
452_cpp_expand_token_space (list, count)
1368ee70 453 cpp_toklist *list;
d1d9a6bd 454 unsigned int count;
1368ee70 455{
d1d9a6bd
NB
456 unsigned int n;
457
458 list->tokens_cap += count;
459 n = list->tokens_cap;
15dad1d9 460 if (list->flags & LIST_OFFSET)
d1d9a6bd 461 list->tokens--, n++;
1368ee70 462 list->tokens = (cpp_token *)
d1d9a6bd 463 xrealloc (list->tokens, n * sizeof (cpp_token));
15dad1d9
ZW
464 if (list->flags & LIST_OFFSET)
465 list->tokens++; /* Skip the dummy. */
1368ee70
ZW
466}
467
d1d9a6bd
NB
468/* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
469 an extra token in front of the token list, as this allows the lexer
470 to always peek at the previous token without worrying about
471 underflowing the list, and some initial space. Otherwise, no
472 token- or name-space is allocated, and there is no dummy token. */
15dad1d9 473void
d1d9a6bd 474_cpp_init_toklist (list, flags)
1368ee70 475 cpp_toklist *list;
d1d9a6bd 476 int flags;
1368ee70 477{
d1d9a6bd
NB
478 if (flags == NO_DUMMY_TOKEN)
479 {
480 list->tokens_cap = 0;
041c3194 481 list->tokens = 0;
d1d9a6bd 482 list->name_cap = 0;
041c3194 483 list->namebuf = 0;
d1d9a6bd
NB
484 list->flags = 0;
485 }
486 else
487 {
488 /* Initialize token space. Put a dummy token before the start
489 that will fail matches. */
490 list->tokens_cap = 256; /* 4K's worth. */
491 list->tokens = (cpp_token *)
492 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
493 list->tokens[0].type = CPP_EOF;
494 list->tokens++;
495
496 /* Initialize name space. */
497 list->name_cap = 1024;
041c3194 498 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
d1d9a6bd
NB
499 list->flags = LIST_OFFSET;
500 }
15dad1d9 501
15dad1d9
ZW
502 _cpp_clear_toklist (list);
503}
1368ee70 504
15dad1d9
ZW
505/* Clear a token list. */
506void
507_cpp_clear_toklist (list)
508 cpp_toklist *list;
509{
c5a04734
ZW
510 list->tokens_used = 0;
511 list->name_used = 0;
041c3194
ZW
512 list->directive = 0;
513 list->paramc = 0;
514 list->params_len = 0;
15dad1d9
ZW
515 list->flags &= LIST_OFFSET; /* clear all but that one */
516}
517
518/* Free a token list. Does not free the list itself, which may be
519 embedded in a larger structure. */
520void
521_cpp_free_toklist (list)
041c3194 522 const cpp_toklist *list;
15dad1d9 523{
15dad1d9
ZW
524 if (list->flags & LIST_OFFSET)
525 free (list->tokens - 1); /* Backup over dummy token. */
526 else
527 free (list->tokens);
528 free (list->namebuf);
1368ee70
ZW
529}
530
15dad1d9
ZW
531/* Compare two tokens. */
532int
533_cpp_equiv_tokens (a, b)
534 const cpp_token *a, *b;
535{
041c3194 536 if (a->type == b->type && a->flags == b->flags)
96be6998 537 switch (TOKEN_SPELL (a))
041c3194
ZW
538 {
539 default: /* Keep compiler happy. */
540 case SPELL_OPERATOR:
541 return 1;
542 case SPELL_CHAR:
543 case SPELL_NONE:
544 return a->val.aux == b->val.aux; /* arg_no or character. */
545 case SPELL_IDENT:
bfb9dc7f 546 return a->val.node == b->val.node;
041c3194 547 case SPELL_STRING:
bfb9dc7f
ZW
548 return (a->val.str.len == b->val.str.len
549 && !memcmp (a->val.str.text, b->val.str.text,
550 a->val.str.len));
041c3194 551 }
15dad1d9 552
041c3194 553 return 0;
15dad1d9
ZW
554}
555
556/* Compare two token lists. */
557int
558_cpp_equiv_toklists (a, b)
559 const cpp_toklist *a, *b;
560{
561 unsigned int i;
562
041c3194
ZW
563 if (a->tokens_used != b->tokens_used
564 || a->flags != b->flags
565 || a->paramc != b->paramc)
15dad1d9
ZW
566 return 0;
567
568 for (i = 0; i < a->tokens_used; i++)
569 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
570 return 0;
571 return 1;
572}
573
041c3194 574/* Utility routine:
9e62c811 575
bfb9dc7f
ZW
576 Compares, the token TOKEN to the NUL-terminated string STRING.
577 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
15dad1d9 578
041c3194 579int
bfb9dc7f
ZW
580cpp_ideq (token, string)
581 const cpp_token *token;
041c3194
ZW
582 const char *string;
583{
bfb9dc7f 584 if (token->type != CPP_NAME)
041c3194 585 return 0;
bfb9dc7f
ZW
586
587 return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
15dad1d9 588}
1368ee70 589
041c3194
ZW
590static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
591 U":>", U"<%", U"%>"};
45b966db 592
0d9f234d
NB
593/* Call when meeting a newline. Returns the character after the newline
594 (or carriage-return newline combination), or EOF. */
595static cppchar_t
596handle_newline (buffer, newline_char)
597 cpp_buffer *buffer;
598 cppchar_t newline_char;
599{
600 cppchar_t next = EOF;
601
602 buffer->col_adjust = 0;
603 buffer->lineno++;
604 buffer->line_base = buffer->cur;
605
606 /* Handle CR-LF and LF-CR combinations, get the next character. */
607 if (buffer->cur < buffer->rlimit)
608 {
609 next = *buffer->cur++;
610 if (next + newline_char == '\r' + '\n')
611 {
612 buffer->line_base = buffer->cur;
613 if (buffer->cur < buffer->rlimit)
614 next = *buffer->cur++;
615 else
616 next = EOF;
617 }
618 }
619
620 buffer->read_ahead = next;
621 return next;
622}
623
624/* Subroutine of skip_escaped_newlines; called when a trigraph is
625 encountered. It warns if necessary, and returns true if the
626 trigraph should be honoured. FROM_CHAR is the third character of a
627 trigraph, and presumed to be the previous character for position
628 reporting. */
45b966db 629static int
0d9f234d 630trigraph_ok (pfile, from_char)
45b966db 631 cpp_reader *pfile;
0d9f234d 632 cppchar_t from_char;
45b966db 633{
041c3194
ZW
634 int accept = CPP_OPTION (pfile, trigraphs);
635
636 if (CPP_OPTION (pfile, warn_trigraphs))
45b966db 637 {
0d9f234d 638 cpp_buffer *buffer = pfile->buffer;
041c3194 639 if (accept)
0d9f234d 640 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
041c3194 641 "trigraph ??%c converted to %c",
0d9f234d
NB
642 (int) from_char,
643 (int) _cpp_trigraph_map[from_char]);
45b966db 644 else
0d9f234d
NB
645 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
646 "trigraph ??%c ignored", (int) from_char);
45b966db 647 }
0d9f234d 648
041c3194 649 return accept;
45b966db
ZW
650}
651
0d9f234d
NB
652/* Assumes local variables buffer and result. */
653#define ACCEPT_CHAR(t) \
654 do { result->type = t; buffer->read_ahead = EOF; } while (0)
655
656/* When we move to multibyte character sets, add to these something
657 that saves and restores the state of the multibyte conversion
658 library. This probably involves saving and restoring a "cookie".
659 In the case of glibc it is an 8-byte structure, so is not a high
660 overhead operation. In any case, it's out of the fast path. */
661#define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
662#define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
663
664/* Skips any escaped newlines introduced by NEXT, which is either a
665 '?' or a '\\'. Returns the next character, which will also have
666 been placed in buffer->read_ahead. */
667static cppchar_t
668skip_escaped_newlines (buffer, next)
669 cpp_buffer *buffer;
670 cppchar_t next;
45b966db 671{
0d9f234d
NB
672 cppchar_t next1;
673 const unsigned char *saved_cur;
674 int space;
041c3194 675
0d9f234d 676 do
041c3194 677 {
0d9f234d
NB
678 if (buffer->cur == buffer->rlimit)
679 break;
680
681 SAVE_STATE ();
682 if (next == '?')
683 {
684 next1 = *buffer->cur++;
685 if (next1 != '?' || buffer->cur == buffer->rlimit)
686 {
687 RESTORE_STATE ();
688 break;
689 }
041c3194 690
0d9f234d
NB
691 next1 = *buffer->cur++;
692 if (!_cpp_trigraph_map[next1] || !trigraph_ok (buffer->pfile, next1))
693 {
694 RESTORE_STATE ();
695 break;
696 }
45b966db 697
0d9f234d
NB
698 /* We have a full trigraph here. */
699 next = _cpp_trigraph_map[next1];
700 if (next != '\\' || buffer->cur == buffer->rlimit)
701 break;
702 SAVE_STATE ();
703 }
704
705 /* We have a backslash, and room for at least one more character. */
706 space = 0;
707 do
708 {
709 next1 = *buffer->cur++;
710 if (!is_nvspace (next1))
711 break;
712 space = 1;
713 }
714 while (buffer->cur < buffer->rlimit);
715
716 if (!is_vspace (next1))
717 {
718 RESTORE_STATE ();
719 break;
720 }
45b966db 721
0d9f234d
NB
722 if (space)
723 cpp_warning (buffer->pfile,
724 "backslash and newline separated by space");
725
726 next = handle_newline (buffer, next1);
727 if (next == EOF)
728 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
041c3194 729 }
0d9f234d 730 while (next == '\\' || next == '?');
45b966db 731
0d9f234d
NB
732 buffer->read_ahead = next;
733 return next;
45b966db
ZW
734}
735
0d9f234d
NB
736/* Obtain the next character, after trigraph conversion and skipping
737 an arbitrary string of escaped newlines. The common case of no
738 trigraphs or escaped newlines falls through quickly. */
739static cppchar_t
740get_effective_char (buffer)
741 cpp_buffer *buffer;
64aaf407 742{
0d9f234d
NB
743 cppchar_t next = EOF;
744
745 if (buffer->cur < buffer->rlimit)
746 {
747 next = *buffer->cur++;
748
749 /* '?' can introduce trigraphs (and therefore backslash); '\\'
750 can introduce escaped newlines, which we want to skip, or
751 UCNs, which, depending upon lexer state, we will handle in
752 the future. */
753 if (next == '?' || next == '\\')
754 next = skip_escaped_newlines (buffer, next);
755 }
756
757 buffer->read_ahead = next;
758 return next;
64aaf407
NB
759}
760
0d9f234d
NB
761/* Skip a C-style block comment. We find the end of the comment by
762 seeing if an asterisk is before every '/' we encounter. Returns
763 non-zero if comment terminated by EOF, zero otherwise. */
041c3194
ZW
764static int
765skip_block_comment (pfile)
45b966db
ZW
766 cpp_reader *pfile;
767{
041c3194 768 cpp_buffer *buffer = pfile->buffer;
0d9f234d
NB
769 cppchar_t c = EOF, prevc;
770
771 while (buffer->cur != buffer->rlimit)
45b966db 772 {
0d9f234d
NB
773 prevc = c, c = *buffer->cur++;
774
775 next_char:
776 /* FIXME: For speed, create a new character class of characters
777 of no interest inside block comments. */
778 if (c == '?' || c == '\\')
779 c = skip_escaped_newlines (buffer, c);
041c3194 780
0d9f234d
NB
781 /* People like decorating comments with '*', so check for '/'
782 instead for efficiency. */
041c3194 783 if (c == '/')
45b966db 784 {
0d9f234d
NB
785 if (prevc == '*')
786 break;
041c3194 787
0d9f234d
NB
788 /* Warn about potential nested comments, but not if the '/'
789 comes immediately before the true comment delimeter.
041c3194 790 Don't bother to get it right across escaped newlines. */
0d9f234d
NB
791 if (CPP_OPTION (pfile, warn_comments)
792 && buffer->cur != buffer->rlimit)
45b966db 793 {
0d9f234d
NB
794 prevc = c, c = *buffer->cur++;
795 if (c == '*' && buffer->cur != buffer->rlimit)
796 {
797 prevc = c, c = *buffer->cur++;
798 if (c != '/')
799 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
800 CPP_BUF_COL (buffer),
801 "\"/*\" within comment");
802 }
803 goto next_char;
45b966db 804 }
45b966db 805 }
91fcd158 806 else if (is_vspace (c))
45b966db 807 {
0d9f234d
NB
808 prevc = c, c = handle_newline (buffer, c);
809 goto next_char;
45b966db 810 }
52fadca8 811 else if (c == '\t')
0d9f234d 812 adjust_column (pfile);
45b966db 813 }
041c3194 814
0d9f234d
NB
815 buffer->read_ahead = EOF;
816 return c != '/' || prevc != '*';
45b966db
ZW
817}
818
f9a0e96c 819/* Skip a C++ line comment. Handles escaped newlines. Returns
0d9f234d
NB
820 non-zero if a multiline comment. The following new line, if any,
821 is left in buffer->read_ahead. */
041c3194 822static int
0d9f234d
NB
823skip_line_comment (buffer)
824 cpp_buffer *buffer;
45b966db 825{
0d9f234d
NB
826 unsigned int orig_lineno = buffer->lineno;
827 cppchar_t c;
041c3194 828
0d9f234d 829 do
041c3194 830 {
0d9f234d
NB
831 c = EOF;
832 if (buffer->cur == buffer->rlimit)
833 break;
041c3194 834
0d9f234d
NB
835 c = *buffer->cur++;
836 if (c == '?' || c == '\\')
837 c = skip_escaped_newlines (buffer, c);
041c3194 838 }
0d9f234d 839 while (!is_vspace (c));
45b966db 840
0d9f234d
NB
841 buffer->read_ahead = c; /* Leave any newline for caller. */
842 return orig_lineno != buffer->lineno;
041c3194 843}
45b966db 844
0d9f234d
NB
845/* pfile->buffer->cur is one beyond the \t character. Update
846 col_adjust so we track the column correctly. */
52fadca8 847static void
0d9f234d 848adjust_column (pfile)
52fadca8 849 cpp_reader *pfile;
52fadca8 850{
0d9f234d
NB
851 cpp_buffer *buffer = pfile->buffer;
852 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
52fadca8
NB
853
854 /* Round it up to multiple of the tabstop, but subtract 1 since the
855 tab itself occupies a character position. */
0d9f234d
NB
856 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
857 - col % CPP_OPTION (pfile, tabstop)) - 1;
52fadca8
NB
858}
859
0d9f234d
NB
860/* Skips whitespace, saving the next non-whitespace character.
861 Adjusts pfile->col_adjust to account for tabs. Without this,
862 tokens might be assigned an incorrect column. */
041c3194 863static void
0d9f234d 864skip_whitespace (pfile, c)
041c3194 865 cpp_reader *pfile;
0d9f234d 866 cppchar_t c;
041c3194
ZW
867{
868 cpp_buffer *buffer = pfile->buffer;
0d9f234d 869 unsigned int warned = 0;
45b966db 870
0d9f234d 871 do
041c3194 872 {
91fcd158
NB
873 /* Horizontal space always OK. */
874 if (c == ' ')
0d9f234d 875 ;
91fcd158 876 else if (c == '\t')
0d9f234d
NB
877 adjust_column (pfile);
878 /* Just \f \v or \0 left. */
91fcd158 879 else if (c == '\0')
041c3194 880 {
91fcd158 881 if (!warned)
0d9f234d
NB
882 {
883 cpp_warning (pfile, "null character(s) ignored");
884 warned = 1;
885 }
45b966db 886 }
0d9f234d 887 else if (IN_DIRECTIVE (pfile) && CPP_PEDANTIC (pfile))
91fcd158
NB
888 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
889 CPP_BUF_COL (buffer),
890 "%s in preprocessing directive",
891 c == '\f' ? "form feed" : "vertical tab");
0d9f234d
NB
892
893 c = EOF;
894 if (buffer->cur == buffer->rlimit)
895 break;
896 c = *buffer->cur++;
45b966db 897 }
0d9f234d
NB
898 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
899 while (is_nvspace (c));
900
901 /* Remember the next character. */
902 buffer->read_ahead = c;
041c3194 903}
45b966db 904
0d9f234d
NB
905/* Parse an identifier, skipping embedded backslash-newlines.
906 Calculate the hash value of the token while parsing, for improved
907 performance. The hashing algorithm *must* match cpp_lookup(). */
908
909static cpp_hashnode *
910parse_identifier (pfile, c)
45b966db 911 cpp_reader *pfile;
0d9f234d 912 cppchar_t c;
45b966db 913{
0d9f234d
NB
914 cpp_buffer *buffer = pfile->buffer;
915 unsigned int r = 0, saw_dollar = 0;
916 unsigned int orig_used = pfile->token_list.name_used;
041c3194 917
0d9f234d 918 do
041c3194 919 {
0d9f234d 920 do
041c3194 921 {
0d9f234d
NB
922 if (pfile->token_list.name_used == pfile->token_list.name_cap)
923 _cpp_expand_name_space (&pfile->token_list,
924 pfile->token_list.name_used + 256);
925 pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
926 r = HASHSTEP (r, c);
45b966db 927
0d9f234d
NB
928 if (c == '$')
929 saw_dollar++;
ba89d661 930
0d9f234d
NB
931 c = EOF;
932 if (buffer->cur == buffer->rlimit)
933 break;
ba89d661 934
0d9f234d
NB
935 c = *buffer->cur++;
936 }
937 while (is_idchar (c));
ba89d661 938
0d9f234d
NB
939 /* Potential escaped newline? */
940 if (c != '?' && c != '\\')
941 break;
942 c = skip_escaped_newlines (buffer, c);
041c3194 943 }
0d9f234d
NB
944 while (is_idchar (c));
945
946 /* $ is not a identifier character in the standard, but is commonly
947 accepted as an extension. Don't warn about it in skipped
948 conditional blocks. */
949 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
950 cpp_pedwarn (pfile, "'$' character(s) in identifier");
951
952 /* Remember the next character. */
953 buffer->read_ahead = c;
954 return _cpp_lookup_with_hash (pfile, &pfile->token_list.namebuf[orig_used],
955 pfile->token_list.name_used - orig_used, r);
45b966db
ZW
956}
957
0d9f234d 958/* Parse a number, skipping embedded backslash-newlines. */
45b966db 959static void
0d9f234d 960parse_number (pfile, number, c)
45b966db 961 cpp_reader *pfile;
0d9f234d
NB
962 cpp_string *number;
963 cppchar_t c;
45b966db 964{
0d9f234d 965 cppchar_t prevc;
041c3194 966 cpp_buffer *buffer = pfile->buffer;
0d9f234d 967 unsigned int orig_used = pfile->token_list.name_used;
45b966db 968
0d9f234d 969 do
041c3194 970 {
0d9f234d
NB
971 do
972 {
973 if (pfile->token_list.name_used == pfile->token_list.name_cap)
974 _cpp_expand_name_space (&pfile->token_list,
975 pfile->token_list.name_used + 256);
976 pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
977
978 prevc = c;
979 c = EOF;
980 if (buffer->cur == buffer->rlimit)
981 break;
45b966db 982
0d9f234d
NB
983 c = *buffer->cur++;
984 }
985 while (is_numchar (c) || c == '.' || VALID_SIGN (c, prevc));
45b966db 986
0d9f234d
NB
987 /* Potential escaped newline? */
988 if (c != '?' && c != '\\')
989 break;
990 c = skip_escaped_newlines (buffer, c);
45b966db 991 }
0d9f234d
NB
992 while (is_numchar (c) || c == '.' || VALID_SIGN (c, prevc));
993
994 /* Remember the next character. */
995 buffer->read_ahead = c;
64aaf407 996
0d9f234d
NB
997 number->text = &pfile->token_list.namebuf[orig_used];
998 number->len = pfile->token_list.name_used - orig_used;
999}
1000
1001/* Subroutine of parse_string. Emits error for unterminated strings. */
1002static void
1003unterminated (pfile, line, term)
1004 cpp_reader *pfile;
1005 unsigned int line;
1006 int term;
1007{
1008 cpp_error (pfile, "missing terminating %c character", term);
1009
1010 if (term == '\"' && pfile->mls_line && pfile->mls_line != line)
041c3194 1011 {
0d9f234d
NB
1012 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_column,
1013 "possible start of unterminated string literal");
1014 pfile->mls_line = 0;
041c3194 1015 }
45b966db
ZW
1016}
1017
0d9f234d
NB
1018/* Parses a string, character constant, or angle-bracketed header file
1019 name. Handles embedded trigraphs and escaped newlines.
45b966db 1020
0d9f234d
NB
1021 Multi-line strings are allowed, but they are deprecated within
1022 directives. */
041c3194 1023static void
0d9f234d 1024parse_string (pfile, token, terminator)
45b966db 1025 cpp_reader *pfile;
041c3194 1026 cpp_token *token;
0d9f234d 1027 cppchar_t terminator;
45b966db 1028{
041c3194 1029 cpp_buffer *buffer = pfile->buffer;
0d9f234d
NB
1030 unsigned int orig_used = pfile->token_list.name_used;
1031 cppchar_t c;
1032 unsigned int nulls = 0;
1033
1034 for (;;)
45b966db 1035 {
0d9f234d
NB
1036 if (buffer->cur == buffer->rlimit)
1037 {
1038 c = EOF;
1039 unterminated (pfile, token->line, terminator);
1040 break;
1041 }
1042 c = *buffer->cur++;
1043
1044 have_char:
1045 /* Handle trigraphs, escaped newlines etc. */
1046 if (c == '?' || c == '\\')
1047 c = skip_escaped_newlines (buffer, c);
45b966db 1048
0d9f234d 1049 if (c == terminator)
45b966db 1050 {
0d9f234d 1051 unsigned int u = pfile->token_list.name_used;
5eec0563 1052
0d9f234d
NB
1053 /* An odd number of consecutive backslashes represents an
1054 escaped terminator. */
1055 while (u > orig_used && pfile->token_list.namebuf[u - 1] == '\\')
1056 u--;
1057
1058 if ((pfile->token_list.name_used - u) % 2 == 0)
45b966db 1059 {
0d9f234d
NB
1060 c = EOF;
1061 break;
45b966db 1062 }
0d9f234d
NB
1063 }
1064 else if (is_vspace (c))
1065 {
1066 /* In assembly language, silently terminate string and
1067 character literals at end of line. This is a kludge
1068 around not knowing where comments are. */
1069 if (CPP_OPTION (pfile, lang_asm) && terminator != '>')
1070 break;
45b966db 1071
0d9f234d
NB
1072 /* Character constants and header names may not extend over
1073 multiple lines. In Standard C, neither may strings.
1074 Unfortunately, we accept multiline strings as an
1075 extension. (Deprecatedly even in directives - otherwise,
1076 glibc's longlong.h breaks.) */
1077 if (terminator != '"')
45b966db 1078 {
0d9f234d
NB
1079 unterminated (pfile, token->line, terminator);
1080 break;
45b966db 1081 }
45b966db 1082
0d9f234d
NB
1083 if (pfile->mls_line == 0)
1084 {
1085 pfile->mls_line = token->line;
1086 pfile->mls_column = token->col;
1087 if (CPP_PEDANTIC (pfile))
1088 cpp_pedwarn (pfile, "multi-line string constant");
041c3194 1089 }
0d9f234d
NB
1090
1091 handle_newline (buffer, c); /* Stores to read_ahead. */
1092 c = '\n';
1093 }
1094 else if (c == '\0')
1095 {
1096 if (nulls++ == 0)
1097 cpp_warning (pfile, "null character(s) preserved in literal");
45b966db 1098 }
45b966db 1099
0d9f234d
NB
1100 if (pfile->token_list.name_used == pfile->token_list.name_cap)
1101 _cpp_expand_name_space (&pfile->token_list,
1102 pfile->token_list.name_used + 256);
476f2869 1103
0d9f234d
NB
1104 pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
1105 /* If we had a new line, the next character is in read_ahead. */
1106 if (c != '\n')
1107 continue;
1108 c = buffer->read_ahead;
1109 if (c != EOF)
1110 goto have_char;
45b966db
ZW
1111 }
1112
0d9f234d 1113 buffer->read_ahead = c;
45b966db 1114
0d9f234d
NB
1115 token->val.str.text = &pfile->token_list.namebuf[orig_used];
1116 token->val.str.len = pfile->token_list.name_used - orig_used;
1117}
041c3194 1118
0d9f234d
NB
1119/* For output routine simplicity, the stored comment includes the
1120 comment start and any terminator. */
9e62c811 1121static void
0d9f234d
NB
1122save_comment (pfile, token, from)
1123 cpp_reader *pfile;
041c3194
ZW
1124 cpp_token *token;
1125 const unsigned char *from;
9e62c811 1126{
041c3194 1127 unsigned char *buffer;
0d9f234d
NB
1128 unsigned int len;
1129 cpp_toklist *list = &pfile->token_list;
1130
1131#define COMMENT_START_LEN 2
1132 len = pfile->buffer->cur - from + COMMENT_START_LEN;
1133 _cpp_reserve_name_space (list, len);
1134 buffer = list->namebuf + list->name_used;
1135 list->name_used += len;
041c3194 1136
041c3194 1137 token->type = CPP_COMMENT;
bfb9dc7f 1138 token->val.str.len = len;
0d9f234d 1139 token->val.str.text = buffer;
45b966db 1140
0d9f234d
NB
1141 /* from[-1] is '/' or '*' depending on the comment type. */
1142 *buffer++ = '/';
1143 *buffer++ = from[-1];
1144 memcpy (buffer, from, len - COMMENT_START_LEN);
1145}
45b966db 1146
0d9f234d
NB
1147/* A helper routine for lex_token. With some long tokens, we need
1148 to read ahead to see if that is the token we have, but back-track
1149 if not. */
1150static void
1151check_long_token (buffer, result, wanted, type)
1152 cpp_buffer *buffer;
1153 cpp_token *result;
1154 cppchar_t wanted;
1155 enum cpp_ttype type;
1156{
1157 const unsigned char *saved_cur;
1158 cppchar_t c = buffer->read_ahead;
1159
1160 SAVE_STATE ();
1161 if (get_effective_char (buffer) == wanted)
1162 ACCEPT_CHAR (type);
041c3194 1163 else
ea4a453b 1164 {
0d9f234d
NB
1165 /* Restore state. */
1166 RESTORE_STATE ();
1167 buffer->read_ahead = c;
ea4a453b 1168 }
45b966db
ZW
1169}
1170
041c3194 1171static void
0d9f234d 1172lex_token (pfile, result)
45b966db 1173 cpp_reader *pfile;
0d9f234d 1174 cpp_token *result;
45b966db 1175{
0d9f234d 1176 cppchar_t c;
041c3194 1177 cpp_buffer *buffer = pfile->buffer;
0d9f234d 1178 const unsigned char *comment_start;
9ec7291f 1179
0d9f234d
NB
1180 result->flags = 0;
1181 next_char:
1182 result->line = CPP_BUF_LINE (buffer);
1183 next_char2:
1184 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
041c3194 1185
0d9f234d
NB
1186 c = buffer->read_ahead;
1187 if (c == EOF && buffer->cur < buffer->rlimit)
1188 {
1189 c = *buffer->cur++;
1190 result->col++;
1191 }
45b966db 1192
0d9f234d
NB
1193 do_switch:
1194 buffer->read_ahead = EOF;
1195 switch (c)
45b966db 1196 {
0d9f234d
NB
1197 case EOF:
1198 /* Non-empty files should end in a newline. Testing
1199 skip_newlines ensures we only emit the warning once. */
1200 if (buffer->cur != buffer->line_base && buffer->cur != buffer->buf
1201 && pfile->state.skip_newlines)
1202 cpp_pedwarn_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer),
1203 "no newline at end of file");
1204 result->type = CPP_EOF;
1205 break;
45b966db 1206
0d9f234d
NB
1207 case ' ': case '\t': case '\f': case '\v': case '\0':
1208 skip_whitespace (pfile, c);
1209 result->flags |= PREV_WHITE;
1210 goto next_char2;
1211
1212 case '\n': case '\r':
1213 result->type = CPP_EOF;
1214 handle_newline (buffer, c);
1215 /* Handling here will change significantly when moving to
1216 token-at-a-time. */
1217 if (pfile->state.skip_newlines)
45b966db 1218 {
0d9f234d
NB
1219 result->flags &= ~PREV_WHITE; /* Clear any whitespace flag. */
1220 goto next_char;
45b966db 1221 }
0d9f234d 1222 break;
46d07497 1223
0d9f234d
NB
1224 case '?':
1225 case '\\':
1226 /* These could start an escaped newline, or '?' a trigraph. Let
1227 skip_escaped_newlines do all the work. */
1228 {
1229 unsigned int lineno = buffer->lineno;
1230
1231 c = skip_escaped_newlines (buffer, c);
1232 if (lineno != buffer->lineno)
1233 /* We had at least one escaped newline of some sort, and the
1234 next character is in buffer->read_ahead. Update the
1235 token's line and column. */
1236 goto next_char;
1237
1238 /* We are either the original '?' or '\\', or a trigraph. */
1239 result->type = CPP_QUERY;
1240 buffer->read_ahead = EOF;
1241 if (c == '\\')
1242 result->type = CPP_BACKSLASH;
1243 else if (c != '?')
1244 goto do_switch;
1245 }
1246 break;
46d07497 1247
0d9f234d
NB
1248 make_number:
1249 case '0': case '1': case '2': case '3': case '4':
1250 case '5': case '6': case '7': case '8': case '9':
1251 result->type = CPP_NUMBER;
1252 parse_number (pfile, &result->val.str, c);
1253 break;
46d07497 1254
0d9f234d
NB
1255 case '$':
1256 if (!CPP_OPTION (pfile, dollars_in_ident))
1257 goto random_char;
1258 /* Fall through... */
1259
1260 case '_':
1261 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1262 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1263 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1264 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1265 case 'y': case 'z':
1266 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1267 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1268 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1269 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1270 case 'Y': case 'Z':
1271 result->type = CPP_NAME;
1272 result->val.node = parse_identifier (pfile, c);
1273
1274 /* 'L' may introduce wide characters or strings. */
1275 if (result->val.node == pfile->spec_nodes->n_L)
1276 {
1277 c = buffer->read_ahead; /* For make_string. */
1278 if (c == '\'' || c == '"')
ba89d661 1279 {
0d9f234d
NB
1280 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1281 goto make_string;
ba89d661 1282 }
0d9f234d
NB
1283 }
1284 /* Convert named operators to their proper types. */
1285 else if (result->val.node->type == T_OPERATOR)
1286 {
1287 result->flags |= NAMED_OP;
1288 result->type = result->val.node->value.code;
1289 }
1290 break;
1291
1292 case '\'':
1293 case '"':
1294 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1295 make_string:
1296 parse_string (pfile, result, c);
1297 break;
041c3194 1298
0d9f234d
NB
1299 case '/':
1300 result->type = CPP_DIV;
1301 c = get_effective_char (buffer);
1302 if (c == '=')
1303 ACCEPT_CHAR (CPP_DIV_EQ);
1304 else if (c == '*')
1305 {
1306 comment_start = buffer->cur;
45b966db 1307
0d9f234d
NB
1308 /* Skip_block_comment updates buffer->read_ahead. */
1309 if (skip_block_comment (pfile))
1310 cpp_error_with_line (pfile, result->line, result->col,
1311 "unterminated comment");
1312 if (!pfile->state.save_comments)
92936ecf 1313 {
0d9f234d
NB
1314 result->flags |= PREV_WHITE;
1315 goto next_char;
92936ecf
ZW
1316 }
1317
0d9f234d
NB
1318 /* Save the comment as a token in its own right. */
1319 save_comment (pfile, result, comment_start);
1320 }
1321 else if (c == '/')
1322 {
1323 /* We silently allow C++ comments in system headers,
1324 irrespective of conformance mode, because lots of
1325 broken systems do that and trying to clean it up in
1326 fixincludes is a nightmare. */
1327 if (CPP_IN_SYSTEM_HEADER (pfile))
1328 goto do_line_comment;
1329 if (CPP_OPTION (pfile, cplusplus_comments))
041c3194 1330 {
0d9f234d
NB
1331 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1332 && ! buffer->warned_cplusplus_comments)
041c3194 1333 {
0d9f234d
NB
1334 cpp_pedwarn (pfile,
1335 "C++ style comments are not allowed in ISO C89");
1336 cpp_pedwarn (pfile,
1337 "(this will be reported only once per input file)");
1338 buffer->warned_cplusplus_comments = 1;
041c3194 1339 }
041c3194 1340
0d9f234d
NB
1341 do_line_comment:
1342 comment_start = buffer->cur;
1343
1344 /* Skip_line_comment updates buffer->read_ahead. */
1345 if (skip_line_comment (buffer))
1346 cpp_warning_with_line (pfile, result->line, result->col,
1347 "multi-line comment");
1348
1349 if (!pfile->state.save_comments)
041c3194 1350 {
0d9f234d
NB
1351 result->flags |= PREV_WHITE;
1352 goto next_char;
041c3194 1353 }
0d9f234d
NB
1354
1355 /* Save the comment as a token in its own right. */
1356 save_comment (pfile, result, comment_start);
45b966db 1357 }
0d9f234d
NB
1358 }
1359 break;
1360
1361 case '<':
1362 if (pfile->state.angled_headers)
1363 {
1364 result->type = CPP_HEADER_NAME;
1365 c = '>'; /* terminator. */
1366 goto make_string;
1367 }
45b966db 1368
0d9f234d
NB
1369 result->type = CPP_LESS;
1370 c = get_effective_char (buffer);
1371 if (c == '=')
1372 ACCEPT_CHAR (CPP_LESS_EQ);
1373 else if (c == '<')
1374 {
1375 ACCEPT_CHAR (CPP_LSHIFT);
1376 if (get_effective_char (buffer) == '=')
1377 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1378 }
1379 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1380 {
1381 ACCEPT_CHAR (CPP_MIN);
1382 if (get_effective_char (buffer) == '=')
1383 ACCEPT_CHAR (CPP_MIN_EQ);
1384 }
1385 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1386 {
1387 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1388 result->flags |= DIGRAPH;
1389 }
1390 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1391 {
1392 ACCEPT_CHAR (CPP_OPEN_BRACE);
1393 result->flags |= DIGRAPH;
1394 }
1395 break;
1396
1397 case '>':
1398 result->type = CPP_GREATER;
1399 c = get_effective_char (buffer);
1400 if (c == '=')
1401 ACCEPT_CHAR (CPP_GREATER_EQ);
1402 else if (c == '>')
1403 {
1404 ACCEPT_CHAR (CPP_RSHIFT);
1405 if (get_effective_char (buffer) == '=')
1406 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1407 }
1408 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1409 {
1410 ACCEPT_CHAR (CPP_MAX);
1411 if (get_effective_char (buffer) == '=')
1412 ACCEPT_CHAR (CPP_MAX_EQ);
1413 }
1414 break;
1415
1416 case '.':
1417 {
1418 const unsigned char *saved_cur;
1419 cppchar_t c1;
1420
1421 /* Save state to avoid needing to pass 2 chars to parse_number. */
1422 SAVE_STATE ();
1423 c1 = get_effective_char (buffer);
1424 /* All known character sets have 0...9 contiguous. */
1425 if (c1 >= '0' && c1 <= '9')
1426 {
1427 RESTORE_STATE ();
1428 goto make_number;
1429 }
1430
1431 result->type = CPP_DOT;
1432 if (c1 == '.')
1433 {
1434 if (get_effective_char (buffer) == '.')
1435 ACCEPT_CHAR (CPP_ELLIPSIS);
1436 else
1437 {
1438 buffer->read_ahead = EOF;
1439 RESTORE_STATE ();
1440 }
1441 }
1442 else if (c1 == '*' && CPP_OPTION (pfile, cplusplus))
1443 ACCEPT_CHAR (CPP_DOT_STAR);
1444 }
1445 break;
1446
1447 case '%':
1448 result->type = CPP_MOD;
1449 c = get_effective_char (buffer);
1450 if (c == '=')
1451 ACCEPT_CHAR (CPP_MOD_EQ);
1452 else if (CPP_OPTION (pfile, digraphs))
1453 {
1454 if (c == ':')
45b966db 1455 {
0d9f234d
NB
1456 result->flags |= DIGRAPH;
1457 ACCEPT_CHAR (CPP_HASH);
1458 if (get_effective_char (buffer) == '%')
1459 check_long_token (buffer, result, ':', CPP_PASTE);
041c3194 1460 }
0d9f234d 1461 else if (c == '>')
041c3194 1462 {
0d9f234d
NB
1463 result->flags |= DIGRAPH;
1464 ACCEPT_CHAR (CPP_CLOSE_BRACE);
f8f769ea 1465 }
0d9f234d
NB
1466 }
1467 break;
45b966db 1468
0d9f234d
NB
1469 case '+':
1470 result->type = CPP_PLUS;
1471 c = get_effective_char (buffer);
1472 if (c == '=')
1473 ACCEPT_CHAR (CPP_PLUS_EQ);
1474 else if (c == '+')
1475 ACCEPT_CHAR (CPP_PLUS_PLUS);
1476 break;
04e3ec78 1477
0d9f234d
NB
1478 case '-':
1479 result->type = CPP_MINUS;
1480 c = get_effective_char (buffer);
1481 if (c == '>')
1482 {
1483 ACCEPT_CHAR (CPP_DEREF);
1484 if (CPP_OPTION (pfile, cplusplus)
1485 && get_effective_char (buffer) == '*')
1486 ACCEPT_CHAR (CPP_DEREF_STAR);
1487 }
1488 else if (c == '=')
1489 ACCEPT_CHAR (CPP_MINUS_EQ);
1490 else if (c == '-')
1491 ACCEPT_CHAR (CPP_MINUS_MINUS);
1492 break;
45b966db 1493
0d9f234d
NB
1494 case '*':
1495 result->type = CPP_MULT;
1496 if (get_effective_char (buffer) == '=')
1497 ACCEPT_CHAR (CPP_MULT_EQ);
1498 break;
04e3ec78 1499
0d9f234d
NB
1500 case '=':
1501 result->type = CPP_EQ;
1502 if (get_effective_char (buffer) == '=')
1503 ACCEPT_CHAR (CPP_EQ_EQ);
1504 break;
f8f769ea 1505
0d9f234d
NB
1506 case '!':
1507 result->type = CPP_NOT;
1508 if (get_effective_char (buffer) == '=')
1509 ACCEPT_CHAR (CPP_NOT_EQ);
1510 break;
45b966db 1511
0d9f234d
NB
1512 case '&':
1513 result->type = CPP_AND;
1514 c = get_effective_char (buffer);
1515 if (c == '=')
1516 ACCEPT_CHAR (CPP_AND_EQ);
1517 else if (c == '&')
1518 ACCEPT_CHAR (CPP_AND_AND);
1519 break;
1520
1521 case '#':
1522 result->type = CPP_HASH;
1523 if (get_effective_char (buffer) == '#')
1524 ACCEPT_CHAR (CPP_PASTE);
1525 break;
45b966db 1526
0d9f234d
NB
1527 case '|':
1528 result->type = CPP_OR;
1529 c = get_effective_char (buffer);
1530 if (c == '=')
1531 ACCEPT_CHAR (CPP_OR_EQ);
1532 else if (c == '|')
1533 ACCEPT_CHAR (CPP_OR_OR);
1534 break;
45b966db 1535
0d9f234d
NB
1536 case '^':
1537 result->type = CPP_XOR;
1538 if (get_effective_char (buffer) == '=')
1539 ACCEPT_CHAR (CPP_XOR_EQ);
1540 break;
45b966db 1541
0d9f234d
NB
1542 case ':':
1543 result->type = CPP_COLON;
1544 c = get_effective_char (buffer);
1545 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1546 ACCEPT_CHAR (CPP_SCOPE);
1547 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1548 {
1549 result->flags |= DIGRAPH;
1550 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1551 }
1552 break;
45b966db 1553
0d9f234d
NB
1554 case '~': result->type = CPP_COMPL; break;
1555 case ',': result->type = CPP_COMMA; break;
1556 case '(': result->type = CPP_OPEN_PAREN; break;
1557 case ')': result->type = CPP_CLOSE_PAREN; break;
1558 case '[': result->type = CPP_OPEN_SQUARE; break;
1559 case ']': result->type = CPP_CLOSE_SQUARE; break;
1560 case '{': result->type = CPP_OPEN_BRACE; break;
1561 case '}': result->type = CPP_CLOSE_BRACE; break;
1562 case ';': result->type = CPP_SEMICOLON; break;
1563
1564 case '@':
1565 if (CPP_OPTION (pfile, objc))
1566 {
1567 /* In Objective C, '@' may begin keywords or strings, like
1568 @keyword or @"string". It would be nice to call
1569 get_effective_char here and test the result. However, we
1570 would then need to pass 2 characters to parse_identifier,
1571 making it ugly and slowing down its main loop. Instead,
1572 we assume we have an identifier, and recover if not. */
1573 result->type = CPP_NAME;
1574 result->val.node = parse_identifier (pfile, c);
1575 if (result->val.node->length != 1)
1576 break;
04e3ec78 1577
0d9f234d
NB
1578 /* OK, so it wasn't an identifier. Maybe a string? */
1579 if (buffer->read_ahead == '"')
041c3194 1580 {
0d9f234d
NB
1581 c = '"';
1582 ACCEPT_CHAR (CPP_OSTRING);
1583 goto make_string;
041c3194 1584 }
0d9f234d
NB
1585 }
1586 goto random_char;
1587
1588 random_char:
1589 default:
1590 result->type = CPP_OTHER;
1591 result->val.aux = c;
1592 break;
1593 }
1594}
1595
1596/*
1597 * The tokenizer's main loop. Returns a token list, representing a
1598 * logical line in the input file. On EOF after some tokens have
1599 * been processed, we return immediately. Then in next call, or if
1600 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1601 * token is placed in the list.
1602 */
1603
1604static void
1605lex_line (pfile, list)
1606 cpp_reader *pfile;
1607 cpp_toklist *list;
1608{
1609 unsigned int first_token;
1610 cpp_token *cur_token, *first;
1611 cpp_buffer *buffer = pfile->buffer;
45b966db 1612
0d9f234d
NB
1613 if (!(list->flags & LIST_OFFSET))
1614 (abort) ();
1615
1616 pfile->state.in_lex_line = 1;
1617 if (pfile->buffer->cur == pfile->buffer->buf)
1618 list->flags |= BEG_OF_FILE;
1619
1620 retry:
1621 pfile->state.in_directive = 0;
1622 pfile->state.angled_headers = 0;
1623 pfile->state.skip_newlines = 1;
1624 pfile->state.save_comments = ! CPP_OPTION (pfile, discard_comments);
1625 first_token = list->tokens_used;
1626 list->file = buffer->nominal_fname;
1627
1628 do
1629 {
1630 if (list->tokens_used >= list->tokens_cap)
1631 _cpp_expand_token_space (list, 256);
1632
1633 cur_token = list->tokens + list->tokens_used;
1634 lex_token (pfile, cur_token);
1635
1636 if (pfile->state.skip_newlines)
1637 {
1638 pfile->state.skip_newlines = 0;
1639 list->line = buffer->lineno;
1640 if (cur_token->type == CPP_HASH)
041c3194 1641 {
0d9f234d
NB
1642 pfile->state.in_directive = 1;
1643 pfile->state.save_comments = 0;
1644 pfile->state.indented = cur_token->flags & PREV_WHITE;
041c3194 1645 }
0d9f234d
NB
1646 /* 6.10.3.10: Within the sequence of preprocessing tokens
1647 making up the invocation of a function-like macro, new
1648 line is considered a normal white-space character. */
1649 else if (first_token != 0)
1650 cur_token->flags |= PREV_WHITE;
1651 }
1652 else if (IN_DIRECTIVE (pfile) && list->tokens_used == first_token + 1)
1653 {
1654 if (cur_token->type == CPP_NUMBER)
1655 list->directive = _cpp_check_linemarker (pfile, cur_token);
041c3194 1656 else
0d9f234d 1657 list->directive = _cpp_check_directive (pfile, cur_token);
041c3194 1658 }
6d2c2047 1659
0d9f234d
NB
1660 /* _cpp_get_line assumes list->tokens_used refers to the current
1661 token being lexed. So do this after _cpp_check_directive to
1662 get the warnings therein correct. */
1663 list->tokens_used++;
041c3194 1664 }
0d9f234d 1665 while (cur_token->type != CPP_EOF);
6d2c2047 1666
041c3194
ZW
1667 /* All tokens are allocated, so the memory location is fixed. */
1668 first = &list->tokens[first_token];
0d9f234d
NB
1669 first->flags |= BOL;
1670 pfile->first_directive_token = first;
1671
041c3194
ZW
1672 /* Don't complain about the null directive, nor directives in
1673 assembly source: we don't know where the comments are, and # may
1674 introduce assembler pseudo-ops. Don't complain about invalid
1675 directives in skipped conditional groups (6.10 p4). */
0d9f234d
NB
1676 if (IN_DIRECTIVE (pfile) && !KNOWN_DIRECTIVE (list) && !pfile->skipping
1677 && !CPP_OPTION (pfile, lang_asm))
041c3194 1678 {
0d9f234d
NB
1679 if (cur_token > first + 1)
1680 {
1681 if (first[1].type == CPP_NAME)
1682 cpp_error_with_line (pfile, first->line, first->col,
1683 "invalid preprocessing directive #%s",
1684 first[1].val.node->name);
1685 else
1686 cpp_error_with_line (pfile, first->line, first->col,
1687 "invalid preprocessing directive");
1688 }
9ec7291f
ZW
1689
1690 /* Discard this line to prevent further errors from cc1. */
1691 _cpp_clear_toklist (list);
1692 goto retry;
041c3194
ZW
1693 }
1694
7eea5554
NB
1695 /* Drop the EOF unless really at EOF or in a directive. */
1696 if (cur_token != first && !KNOWN_DIRECTIVE (list)
1697 && pfile->done_initializing)
1698 list->tokens_used--;
1699
0d9f234d 1700 pfile->state.in_lex_line = 0;
6d2c2047
ZW
1701}
1702
041c3194 1703/* Write the spelling of a token TOKEN, with any appropriate
58fea6af
ZW
1704 whitespace before it, to FP. PREV is the previous token, which
1705 is used to determine if we need to shove in an extra space in order
1706 to avoid accidental token paste. If WHITE is 0, do not insert any
1707 leading whitespace. */
041c3194 1708static void
58fea6af 1709output_token (pfile, fp, token, prev, white)
041c3194 1710 cpp_reader *pfile;
58fea6af 1711 FILE *fp;
041c3194 1712 const cpp_token *token, *prev;
58fea6af 1713 int white;
041c3194 1714{
58fea6af 1715 if (white)
041c3194 1716 {
58fea6af
ZW
1717 int dummy;
1718
1719 if (token->col && (token->flags & BOL))
1720 {
1721 /* Supply enough whitespace to put this token in its original
1722 column. Don't bother trying to reconstruct tabs; we can't
1723 get it right in general, and nothing ought to care. (Yes,
1724 some things do care; the fault lies with them.) */
1725 unsigned int spaces = token->col - 1;
1726
1727 while (spaces--)
1728 putc (' ', fp);
1729 }
1730 else if (token->flags & PREV_WHITE)
1731 putc (' ', fp);
1732 else
1733 /* Check for and prevent accidental token pasting.
1734 In addition to the cases handled by can_paste, consider
1735
1736 a + ++b - if there is not a space between the + and ++, it
1737 will be misparsed as a++ + b. But + ## ++ doesn't produce
1738 a valid token. */
1739 if (prev
1740 && (can_paste (pfile, prev, token, &dummy) != CPP_EOF
1741 || (prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1742 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS)))
1743 putc (' ', fp);
041c3194 1744 }
58fea6af
ZW
1745
1746 switch (TOKEN_SPELL (token))
041c3194 1747 {
58fea6af
ZW
1748 case SPELL_OPERATOR:
1749 {
1750 const unsigned char *spelling;
1751
1752 if (token->flags & DIGRAPH)
1753 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1754 else if (token->flags & NAMED_OP)
1755 goto spell_ident;
1756 else
1757 spelling = TOKEN_NAME (token);
1758
1759 ufputs (spelling, fp);
1760 }
1761 break;
1762
1763 case SPELL_IDENT:
1764 spell_ident:
1765 ufputs (token->val.node->name, fp);
1766 break;
1767
1768 case SPELL_STRING:
1769 {
ba89d661
ZW
1770 int left, right, tag;
1771 switch (token->type)
1772 {
1773 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1774 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1775 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1776 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1777 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1778 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1779 default: left = '\0'; right = '\0'; tag = '\0'; break;
1780 }
1781 if (tag) putc (tag, fp);
1782 if (left) putc (left, fp);
58fea6af 1783 fwrite (token->val.str.text, 1, token->val.str.len, fp);
ba89d661 1784 if (right) putc (right, fp);
58fea6af
ZW
1785 }
1786 break;
1787
1788 case SPELL_CHAR:
1789 putc (token->val.aux, fp);
1790 break;
1791
1792 case SPELL_NONE:
1793 /* Placemarker or EOF - no output. (Macro args are handled
1794 elsewhere. */
1795 break;
041c3194 1796 }
58fea6af
ZW
1797}
1798
1799/* Dump the original user's spelling of argument index ARG_NO to the
1800 macro whose expansion is LIST. */
1801static void
1802dump_param_spelling (fp, list, arg_no)
1803 FILE *fp;
1804 const cpp_toklist *list;
1805 unsigned int arg_no;
1806{
1807 const U_CHAR *param = list->namebuf;
1808
1809 while (arg_no--)
1810 param += ustrlen (param) + 1;
1811 ufputs (param, fp);
1812}
1813
1814/* Output all the tokens of LIST, starting at TOKEN, to FP. */
1815void
1816cpp_output_list (pfile, fp, list, token)
1817 cpp_reader *pfile;
1818 FILE *fp;
1819 const cpp_toklist *list;
1820 const cpp_token *token;
1821{
1822 const cpp_token *limit = list->tokens + list->tokens_used;
1823 const cpp_token *prev = 0;
1824 int white = 0;
d6d5f795 1825
58fea6af
ZW
1826 while (token < limit)
1827 {
1828 /* XXX Find some way we can write macro args from inside
1829 output_token/spell_token. */
1830 if (token->type == CPP_MACRO_ARG)
1831 {
1832 if (white && token->flags & PREV_WHITE)
1833 putc (' ', fp);
1834 if (token->flags & STRINGIFY_ARG)
1835 putc ('#', fp);
1836 dump_param_spelling (fp, list, token->val.aux);
1837 }
1838 else
1839 output_token (pfile, fp, token, prev, white);
1840 if (token->flags & PASTE_LEFT)
1841 fputs (" ##", fp);
1842 prev = token;
1843 token++;
1844 white = 1;
1845 }
041c3194 1846}
d6d5f795 1847
58fea6af 1848
041c3194 1849/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1850 already contain the enough space to hold the token's spelling.
1851 Returns a pointer to the character after the last character
1852 written. */
d6d5f795 1853
041c3194
ZW
1854static unsigned char *
1855spell_token (pfile, token, buffer)
1856 cpp_reader *pfile; /* Would be nice to be rid of this... */
1857 const cpp_token *token;
1858 unsigned char *buffer;
1859{
96be6998 1860 switch (TOKEN_SPELL (token))
041c3194
ZW
1861 {
1862 case SPELL_OPERATOR:
1863 {
1864 const unsigned char *spelling;
1865 unsigned char c;
d6d5f795 1866
041c3194
ZW
1867 if (token->flags & DIGRAPH)
1868 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
92936ecf
ZW
1869 else if (token->flags & NAMED_OP)
1870 goto spell_ident;
041c3194 1871 else
96be6998 1872 spelling = TOKEN_NAME (token);
041c3194
ZW
1873
1874 while ((c = *spelling++) != '\0')
1875 *buffer++ = c;
1876 }
1877 break;
d6d5f795 1878
041c3194 1879 case SPELL_IDENT:
92936ecf 1880 spell_ident:
bfb9dc7f
ZW
1881 memcpy (buffer, token->val.node->name, token->val.node->length);
1882 buffer += token->val.node->length;
041c3194 1883 break;
d6d5f795 1884
041c3194
ZW
1885 case SPELL_STRING:
1886 {
ba89d661
ZW
1887 int left, right, tag;
1888 switch (token->type)
1889 {
1890 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1891 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1892 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1893 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1894 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1895 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1896 default: left = '\0'; right = '\0'; tag = '\0'; break;
1897 }
1898 if (tag) *buffer++ = tag;
1899 if (left) *buffer++ = left;
bfb9dc7f
ZW
1900 memcpy (buffer, token->val.str.text, token->val.str.len);
1901 buffer += token->val.str.len;
ba89d661 1902 if (right) *buffer++ = right;
041c3194
ZW
1903 }
1904 break;
d6d5f795 1905
041c3194
ZW
1906 case SPELL_CHAR:
1907 *buffer++ = token->val.aux;
1908 break;
d6d5f795 1909
041c3194 1910 case SPELL_NONE:
96be6998 1911 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1912 break;
1913 }
d6d5f795 1914
041c3194
ZW
1915 return buffer;
1916}
d6d5f795 1917
f67798e7
NB
1918/* Macro expansion algorithm.
1919
1920Macro expansion is implemented by a single-pass algorithm; there are
1921no rescan passes involved. cpp_get_token expands just enough to be
1922able to return a token to the caller, a consequence is that when it
1923returns the preprocessor can be in a state of mid-expansion. The
1924algorithm does not work by fully expanding a macro invocation into
1925some kind of token list, and then returning them one by one.
1926
1927Our expansion state is recorded in a context stack. We start out with
1928a single context on the stack, let's call it base context. This
1929consists of the token list returned by lex_line that forms the next
1930logical line in the source file.
1931
1932The current level in the context stack is stored in the cur_context
1933member of the cpp_reader structure. The context it references keeps,
1934amongst other things, a count of how many tokens form that context and
1935our position within those tokens.
1936
1937Fundamentally, calling cpp_get_token will return the next token from
1938the current context. If we're at the end of the current context, that
1939context is popped from the stack first, unless it is the base context,
1940in which case the next logical line is lexed from the source file.
1941
1942However, before returning the token, if it is a CPP_NAME token
1943_cpp_get_token checks to see if it is a macro and if it is enabled.
1944Each time it encounters a macro name, it calls push_macro_context.
1945This function checks that the macro should be expanded (with
1946is_macro_enabled), and if so pushes a new macro context on the stack
1947which becomes the current context. It then loops back to read the
1948first token of the macro context.
1949
1950A macro context basically consists of the token list representing the
1951macro's replacement list, which was saved in the hash table by
1952save_macro_expansion when its #define statement was parsed. If the
1953macro is function-like, it also contains the tokens that form the
1954arguments to the macro. I say more about macro arguments below, but
1955for now just saying that each argument is a set of pointers to tokens
1956is enough.
1957
1958When taking tokens from a macro context, we may get a CPP_MACRO_ARG
1959token. This represents an argument passed to the macro, with the
1960argument number stored in the token's AUX field. The argument should
1961be substituted, this is achieved by pushing an "argument context". An
1962argument context is just refers to the tokens forming the argument,
1963which are obtained directly from the macro context. The STRINGIFY
1964flag on a CPP_MACRO_ARG token indicates that the argument should be
1965stringified.
1966
1967Here's a few simple rules the context stack obeys:-
1968
1969 1) The lex_line token list is always context zero.
1970
1971 2) Context 1, if it exists, must be a macro context.
1972
1973 3) An argument context can only appear above a macro context.
1974
1975 4) A macro context can appear above the base context, another macro
1976 context, or an argument context.
1977
1978 5) These imply that the minimal level of an argument context is 2.
1979
1980The only tricky thing left is ensuring that macros are enabled and
1981disabled correctly. The algorithm controls macro expansion by the
1982level of the context a token is taken from in the context stack. If a
1983token is taken from a level equal to no_expand_level (a member of
1984struct cpp_reader), no expansion is performed.
1985
1986When popping a context off the stack, if no_expand_level equals the
1987level of the popped context, it is reduced by one to match the new
1988context level, so that expansion is still disabled. It does not
1989increase if a context is pushed, though. It starts out life as
1990UINT_MAX, which has the effect that initially macro expansion is
1991enabled. I explain how this mechanism works below.
1992
1993The standard requires:-
1994
1995 1) Arguments to be fully expanded before substitution.
1996
1997 2) Stringified arguments to not be expanded, nor the tokens
1998 immediately surrounding a ## operator.
1999
2000 3) Continual rescanning until there are no more macros left to
2001 replace.
2002
2003 4) Once a macro has been expanded in stage 1) or 3), it cannot be
2004 expanded again during later rescans. This prevents infinite
2005 recursion.
2006
2007The first thing to observe is that stage 3) is mostly redundant.
2008Since a macro is disabled once it has been expanded, how can a rescan
2009find an unexpanded macro name? There are only two cases where this is
2010possible:-
2011
2012 a) If the macro name results from a token paste operation.
2013
2014 b) If the macro in question is a function-like macro that hasn't
2015 already been expanded because previously there was not the required
2016 '(' token immediately following it. This is only possible when an
2017 argument is substituted, and after substitution the last token of
2018 the argument can bind with a parenthesis appearing in the tokens
2019 following the substitution. Note that if the '(' appears within the
2020 argument, the ')' must too, as expanding macro arguments cannot
2021 "suck in" tokens outside the argument.
2022
2023So we tackle this as follows. When parsing the macro invocation for
2024arguments, we record the tokens forming each argument as a list of
2025pointers to those tokens. We do not expand any tokens that are "raw",
2026i.e. directly from the macro invocation, but other tokens that come
2027from (nested) argument substitution are fully expanded.
2028
2029This is achieved by setting the no_expand_level to that of the macro
2030invocation. A CPP_MACRO_ARG token never appears in the list of tokens
2031forming an argument, because parse_args (indirectly) calls
2032get_raw_token which automatically pushes argument contexts and traces
2033into them. Since these contexts are at a higher level than the
2034no_expand_level, they get fully macro expanded.
2035
2036"Raw" and non-raw tokens are separated in arguments by null pointers,
2037with the policy that the initial state of an argument is raw. If the
2038first token is not raw, it should be preceded by a null pointer. When
2039tracing through the tokens of an argument context, each time
2040get_raw_token encounters a null pointer, it toggles the flag
2041CONTEXT_RAW.
2042
2043This flag, when set, indicates to is_macro_disabled that we are
2044reading raw tokens which should be macro-expanded. Similarly, if
2045clear, is_macro_disabled suppresses re-expansion.
2046
2047It's probably time for an example.
2048
2049#define hash #
2050#define str(x) #x
2051#define xstr(y) str(y hash)
2052str(hash) // "hash"
2053xstr(hash) // "# hash"
2054
2055In the invocation of str, parse_args turns off macro expansion and so
2056parses the argument as <hash>. This is the only token (pointer)
2057passed as the argument to str. Since <hash> is raw there is no need
2058for an initial null pointer. stringify_arg is called from
2059get_raw_token when tracing through the expansion of str, since the
2060argument has the STRINGIFY flag set. stringify_arg turns off
2061macro_expansion by setting the no_expand_level to that of the argument
2062context. Thus it gets the token <hash> and stringifies it to "hash"
2063correctly.
2064
2065Similary xstr is passed <hash>. However, when parse_args is parsing
2066the invocation of str() in xstr's expansion, get_raw_token encounters
2067a CPP_MACRO_ARG token for y. Transparently to parse_args, it pushes
2068an argument context, and enters the tokens of the argument,
2069i.e. <hash>. This is at a higher context level than parse_args
2070disabled, and so is_macro_disabled permits expansion of it and a macro
2071context is pushed on top of the argument context. This contains the
2072<#> token, and the end result is that <hash> is macro expanded.
2073However, after popping off the argument context, the <hash> of xstr's
2074expansion does not get macro expanded because we're back at the
2075no_expand_level. The end result is that the argument passed to str is
2076<NULL> <#> <NULL> <hash>. Note the nulls - policy is we start off
2077raw, <#> is not raw, but then <hash> is.
2078
2079*/
d6d5f795 2080
041c3194
ZW
2081
2082/* Free the storage allocated for macro arguments. */
2083static void
2084free_macro_args (args)
2085 macro_args *args;
c5a04734 2086{
041c3194 2087 if (args->tokens)
417f3e3a 2088 free ((PTR) args->tokens);
041c3194
ZW
2089 free (args->ends);
2090 free (args);
c5a04734
ZW
2091}
2092
041c3194
ZW
2093/* Determines if a macro has been already used (and is therefore
2094 disabled). */
c5a04734 2095static int
041c3194 2096is_macro_disabled (pfile, expansion, token)
c5a04734 2097 cpp_reader *pfile;
041c3194
ZW
2098 const cpp_toklist *expansion;
2099 const cpp_token *token;
c5a04734 2100{
041c3194
ZW
2101 cpp_context *context = CURRENT_CONTEXT (pfile);
2102
2103 /* Arguments on either side of ## are inserted in place without
2104 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
2105 occurs during a later rescan pass. The effect is that we expand
2106 iff we would as part of the macro's expansion list, so we should
2107 drop to the macro's context. */
2108 if (IS_ARG_CONTEXT (context))
c5a04734 2109 {
041c3194
ZW
2110 if (token->flags & PASTED)
2111 context--;
2112 else if (!(context->flags & CONTEXT_RAW))
2113 return 1;
2114 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2115 context--;
c5a04734 2116 }
c5a04734 2117
041c3194
ZW
2118 /* Have we already used this macro? */
2119 while (context->level > 0)
c5a04734 2120 {
041c3194
ZW
2121 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2122 return 1;
2123 /* Raw argument tokens are judged based on the token list they
2124 came from. */
2125 if (context->flags & CONTEXT_RAW)
2126 context = pfile->contexts + context->level;
2127 else
2128 context--;
2129 }
c5a04734 2130
041c3194
ZW
2131 /* Function-like macros may be disabled if the '(' is not in the
2132 current context. We check this without disrupting the context
2133 stack. */
2134 if (expansion->paramc >= 0)
2135 {
2136 const cpp_token *next;
2137 unsigned int prev_nme;
c5a04734 2138
041c3194
ZW
2139 context = CURRENT_CONTEXT (pfile);
2140 /* Drop down any contexts we're at the end of: the '(' may
2141 appear in lower macro expansions, or in the rest of the file. */
2142 while (context->posn == context->count && context > pfile->contexts)
2143 {
2144 context--;
2145 /* If we matched, we are disabled, as we appear in the
2146 expansion of each macro we meet. */
2147 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2148 return 1;
2149 }
c5a04734 2150
041c3194
ZW
2151 prev_nme = pfile->no_expand_level;
2152 pfile->no_expand_level = context - pfile->contexts;
417f3e3a 2153 next = _cpp_get_token (pfile);
041c3194
ZW
2154 restore_macro_expansion (pfile, prev_nme);
2155 if (next->type != CPP_OPEN_PAREN)
2156 {
2157 _cpp_push_token (pfile, next);
b9bf5af8 2158 if (CPP_WTRADITIONAL (pfile))
041c3194 2159 cpp_warning (pfile,
92936ecf
ZW
2160 "function macro %s must be used with arguments in traditional C",
2161 token->val.node->name);
041c3194
ZW
2162 return 1;
2163 }
c5a04734 2164 }
c5a04734 2165
041c3194 2166 return 0;
c5a04734
ZW
2167}
2168
041c3194
ZW
2169/* Add a token to the set of tokens forming the arguments to the macro
2170 being parsed in parse_args. */
2171static void
2172save_token (args, token)
2173 macro_args *args;
2174 const cpp_token *token;
c5a04734 2175{
041c3194
ZW
2176 if (args->used == args->capacity)
2177 {
2178 args->capacity += args->capacity + 100;
2179 args->tokens = (const cpp_token **)
417f3e3a
ZW
2180 xrealloc ((PTR) args->tokens,
2181 args->capacity * sizeof (const cpp_token *));
041c3194
ZW
2182 }
2183 args->tokens[args->used++] = token;
c5a04734
ZW
2184}
2185
041c3194
ZW
2186/* Take and save raw tokens until we finish one argument. Empty
2187 arguments are saved as a single CPP_PLACEMARKER token. */
2188static const cpp_token *
2189parse_arg (pfile, var_args, paren_context, args, pcount)
c5a04734 2190 cpp_reader *pfile;
041c3194
ZW
2191 int var_args;
2192 unsigned int paren_context;
2193 macro_args *args;
2194 unsigned int *pcount;
c5a04734 2195{
041c3194
ZW
2196 const cpp_token *token;
2197 unsigned int paren = 0, count = 0;
2198 int raw, was_raw = 1;
c5a04734 2199
041c3194 2200 for (count = 0;; count++)
c5a04734 2201 {
417f3e3a 2202 token = _cpp_get_token (pfile);
c5a04734 2203
041c3194 2204 switch (token->type)
c5a04734 2205 {
041c3194
ZW
2206 default:
2207 break;
c5a04734 2208
041c3194
ZW
2209 case CPP_OPEN_PAREN:
2210 paren++;
2211 break;
2212
2213 case CPP_CLOSE_PAREN:
2214 if (paren-- != 0)
2215 break;
2216 goto out;
2217
2218 case CPP_COMMA:
2219 /* Commas are not terminators within parantheses or var_args. */
2220 if (paren || var_args)
2221 break;
2222 goto out;
2223
2224 case CPP_EOF: /* Error reported by caller. */
2225 goto out;
c5a04734 2226 }
c5a04734 2227
041c3194
ZW
2228 raw = pfile->cur_context <= paren_context;
2229 if (raw != was_raw)
2230 {
2231 was_raw = raw;
2232 save_token (args, 0);
2233 count++;
c5a04734 2234 }
041c3194 2235 save_token (args, token);
c5a04734 2236 }
c5a04734
ZW
2237
2238 out:
041c3194
ZW
2239 if (count == 0)
2240 {
2241 /* Duplicate the placemarker. Then we can set its flags and
2242 position and safely be using more than one. */
2243 save_token (args, duplicate_token (pfile, &placemarker_token));
2244 count++;
2245 }
2246
2247 *pcount = count;
2248 return token;
c5a04734
ZW
2249}
2250
041c3194
ZW
2251/* This macro returns true if the argument starting at offset O of arglist
2252 A is empty - that is, it's either a single PLACEMARKER token, or a null
2253 pointer followed by a PLACEMARKER. */
2254
2255#define empty_argument(A, O) \
2256 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2257 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2258
2259/* Parse the arguments making up a macro invocation. Nested arguments
2260 are automatically macro expanded, but immediate macros are not
2261 expanded; this enables e.g. operator # to work correctly. Returns
2262 non-zero on error. */
c5a04734 2263static int
041c3194 2264parse_args (pfile, hp, args)
c5a04734 2265 cpp_reader *pfile;
041c3194
ZW
2266 cpp_hashnode *hp;
2267 macro_args *args;
c5a04734 2268{
041c3194
ZW
2269 const cpp_token *token;
2270 const cpp_toklist *macro;
2271 unsigned int total = 0;
2272 unsigned int paren_context = pfile->cur_context;
2273 int argc = 0;
2274
2275 macro = hp->value.expansion;
2276 do
c5a04734 2277 {
041c3194 2278 unsigned int count;
c5a04734 2279
041c3194
ZW
2280 token = parse_arg (pfile, (argc + 1 == macro->paramc
2281 && (macro->flags & VAR_ARGS)),
2282 paren_context, args, &count);
2283 if (argc < macro->paramc)
c5a04734 2284 {
041c3194
ZW
2285 total += count;
2286 args->ends[argc] = total;
c5a04734 2287 }
041c3194 2288 argc++;
c5a04734 2289 }
041c3194 2290 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
c5a04734 2291
041c3194
ZW
2292 if (token->type == CPP_EOF)
2293 {
92936ecf 2294 cpp_error(pfile, "unterminated argument list for macro \"%s\"", hp->name);
041c3194
ZW
2295 return 1;
2296 }
2297 else if (argc < macro->paramc)
2298 {
2299 /* A rest argument is allowed to not appear in the invocation at all.
2300 e.g. #define debug(format, args...) ...
2301 debug("string");
2302 This is exactly the same as if the rest argument had received no
563dd08a 2303 tokens - debug("string",); This extension is deprecated. */
6fee6033
ZW
2304
2305 if (argc + 1 == macro->paramc && (macro->flags & VAR_ARGS))
041c3194
ZW
2306 {
2307 /* Duplicate the placemarker. Then we can set its flags and
2308 position and safely be using more than one. */
6fee6033
ZW
2309 cpp_token *pm = duplicate_token (pfile, &placemarker_token);
2310 pm->flags = VOID_REST;
2311 save_token (args, pm);
041c3194 2312 args->ends[argc] = total + 1;
6fee6033
ZW
2313
2314 if (CPP_OPTION (pfile, c99) && CPP_PEDANTIC (pfile))
2315 cpp_pedwarn (pfile, "ISO C99 requires rest arguments to be used");
2316
041c3194
ZW
2317 return 0;
2318 }
2319 else
2320 {
92936ecf 2321 cpp_error (pfile, "not enough arguments for macro \"%s\"", hp->name);
041c3194
ZW
2322 return 1;
2323 }
2324 }
2325 /* An empty argument to an empty function-like macro is fine. */
2326 else if (argc > macro->paramc
2327 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2328 {
92936ecf 2329 cpp_error (pfile, "too many arguments for macro \"%s\"", hp->name);
041c3194
ZW
2330 return 1;
2331 }
c5a04734 2332
041c3194
ZW
2333 return 0;
2334}
2335
2336/* Adds backslashes before all backslashes and double quotes appearing
2337 in strings. Non-printable characters are converted to octal. */
2338static U_CHAR *
2339quote_string (dest, src, len)
2340 U_CHAR *dest;
2341 const U_CHAR *src;
2342 unsigned int len;
2343{
2344 while (len--)
c5a04734 2345 {
041c3194 2346 U_CHAR c = *src++;
c5a04734 2347
041c3194 2348 if (c == '\\' || c == '"')
6ab3e7dd 2349 {
041c3194
ZW
2350 *dest++ = '\\';
2351 *dest++ = c;
2352 }
2353 else
2354 {
2355 if (ISPRINT (c))
2356 *dest++ = c;
2357 else
2358 {
2359 sprintf ((char *) dest, "\\%03o", c);
2360 dest += 4;
2361 }
6ab3e7dd 2362 }
c5a04734 2363 }
c5a04734 2364
041c3194 2365 return dest;
c5a04734
ZW
2366}
2367
041c3194
ZW
2368/* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2369 CPP_STRING token containing TEXT in quoted form. */
2370static cpp_token *
2371make_string_token (token, text, len)
2372 cpp_token *token;
2373 const U_CHAR *text;
2374 unsigned int len;
2375{
2376 U_CHAR *buf;
2377
2378 buf = (U_CHAR *) xmalloc (len * 4);
2379 token->type = CPP_STRING;
2380 token->flags = 0;
bfb9dc7f
ZW
2381 token->val.str.text = buf;
2382 token->val.str.len = quote_string (buf, text, len) - buf;
041c3194
ZW
2383 return token;
2384}
2385
2386/* Allocates and converts a temporary token to a CPP_NUMBER token,
2387 evaluating to NUMBER. */
2388static cpp_token *
2389alloc_number_token (pfile, number)
c5a04734 2390 cpp_reader *pfile;
041c3194 2391 int number;
c5a04734 2392{
041c3194
ZW
2393 cpp_token *result;
2394 char *buf;
2395
2396 result = get_temp_token (pfile);
2397 buf = xmalloc (20);
2398 sprintf (buf, "%d", number);
2399
2400 result->type = CPP_NUMBER;
2401 result->flags = 0;
bfb9dc7f
ZW
2402 result->val.str.text = (U_CHAR *) buf;
2403 result->val.str.len = strlen (buf);
041c3194
ZW
2404 return result;
2405}
c5a04734 2406
041c3194
ZW
2407/* Returns a temporary token from the temporary token store of PFILE. */
2408static cpp_token *
2409get_temp_token (pfile)
2410 cpp_reader *pfile;
2411{
2412 if (pfile->temp_used == pfile->temp_alloced)
2413 {
2414 if (pfile->temp_used == pfile->temp_cap)
2415 {
2416 pfile->temp_cap += pfile->temp_cap + 20;
2417 pfile->temp_tokens = (cpp_token **) xrealloc
2418 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2419 }
2420 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2421 (sizeof (cpp_token));
2422 }
c5a04734 2423
041c3194
ZW
2424 return pfile->temp_tokens[pfile->temp_used++];
2425}
2426
2427/* Release (not free) for re-use the temporary tokens of PFILE. */
2428static void
2429release_temp_tokens (pfile)
2430 cpp_reader *pfile;
2431{
2432 while (pfile->temp_used)
c5a04734 2433 {
041c3194 2434 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
c5a04734 2435
96be6998 2436 if (TOKEN_SPELL (token) == SPELL_STRING)
c5a04734 2437 {
bfb9dc7f
ZW
2438 free ((char *) token->val.str.text);
2439 token->val.str.text = 0;
c5a04734
ZW
2440 }
2441 }
041c3194 2442}
c5a04734 2443
041c3194
ZW
2444/* Free all of PFILE's dynamically-allocated temporary tokens. */
2445void
2446_cpp_free_temp_tokens (pfile)
2447 cpp_reader *pfile;
2448{
2449 if (pfile->temp_tokens)
c5a04734 2450 {
041c3194
ZW
2451 /* It is possible, though unlikely (looking for '(' of a funlike
2452 macro into EOF), that we haven't released the tokens yet. */
2453 release_temp_tokens (pfile);
2454 while (pfile->temp_alloced)
2455 free (pfile->temp_tokens[--pfile->temp_alloced]);
2456 free (pfile->temp_tokens);
c5a04734
ZW
2457 }
2458
041c3194
ZW
2459 if (pfile->date)
2460 {
bfb9dc7f 2461 free ((char *) pfile->date->val.str.text);
041c3194 2462 free (pfile->date);
bfb9dc7f 2463 free ((char *) pfile->time->val.str.text);
041c3194
ZW
2464 free (pfile->time);
2465 }
c5a04734
ZW
2466}
2467
041c3194
ZW
2468/* Copy TOKEN into a temporary token from PFILE's store. */
2469static cpp_token *
2470duplicate_token (pfile, token)
2471 cpp_reader *pfile;
2472 const cpp_token *token;
2473{
2474 cpp_token *result = get_temp_token (pfile);
c5a04734 2475
041c3194 2476 *result = *token;
96be6998 2477 if (TOKEN_SPELL (token) == SPELL_STRING)
041c3194 2478 {
bfb9dc7f
ZW
2479 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2480 memcpy (buff, token->val.str.text, token->val.str.len);
2481 result->val.str.text = buff;
041c3194
ZW
2482 }
2483 return result;
2484}
c5a04734 2485
041c3194
ZW
2486/* Determine whether two tokens can be pasted together, and if so,
2487 what the resulting token is. Returns CPP_EOF if the tokens cannot
2488 be pasted, or the appropriate type for the merged token if they
2489 can. */
2490static enum cpp_ttype
2491can_paste (pfile, token1, token2, digraph)
2492 cpp_reader * pfile;
2493 const cpp_token *token1, *token2;
2494 int* digraph;
c5a04734 2495{
041c3194
ZW
2496 enum cpp_ttype a = token1->type, b = token2->type;
2497 int cxx = CPP_OPTION (pfile, cplusplus);
c5a04734 2498
92936ecf
ZW
2499 /* Treat named operators as if they were ordinary NAMEs. */
2500 if (token1->flags & NAMED_OP)
2501 a = CPP_NAME;
2502 if (token2->flags & NAMED_OP)
2503 b = CPP_NAME;
2504
041c3194
ZW
2505 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2506 return a + (CPP_EQ_EQ - CPP_EQ);
c5a04734 2507
041c3194 2508 switch (a)
c5a04734 2509 {
041c3194
ZW
2510 case CPP_GREATER:
2511 if (b == a) return CPP_RSHIFT;
2512 if (b == CPP_QUERY && cxx) return CPP_MAX;
2513 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2514 break;
2515 case CPP_LESS:
2516 if (b == a) return CPP_LSHIFT;
2517 if (b == CPP_QUERY && cxx) return CPP_MIN;
2518 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
9b55f29a
NB
2519 if (CPP_OPTION (pfile, digraphs))
2520 {
2521 if (b == CPP_COLON)
2522 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2523 if (b == CPP_MOD)
2524 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2525 }
041c3194 2526 break;
c5a04734 2527
041c3194
ZW
2528 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2529 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2530 case CPP_OR: if (b == a) return CPP_OR_OR; break;
c5a04734 2531
041c3194
ZW
2532 case CPP_MINUS:
2533 if (b == a) return CPP_MINUS_MINUS;
2534 if (b == CPP_GREATER) return CPP_DEREF;
2535 break;
2536 case CPP_COLON:
2537 if (b == a && cxx) return CPP_SCOPE;
9b55f29a 2538 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
041c3194
ZW
2539 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2540 break;
2541
2542 case CPP_MOD:
9b55f29a
NB
2543 if (CPP_OPTION (pfile, digraphs))
2544 {
2545 if (b == CPP_GREATER)
2546 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2547 if (b == CPP_COLON)
2548 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2549 }
041c3194
ZW
2550 break;
2551 case CPP_DEREF:
2552 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2553 break;
2554 case CPP_DOT:
2555 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2556 if (b == CPP_NUMBER) return CPP_NUMBER;
2557 break;
2558
2559 case CPP_HASH:
2560 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2561 /* %:%: digraph */
2562 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2563 break;
2564
2565 case CPP_NAME:
2566 if (b == CPP_NAME) return CPP_NAME;
2567 if (b == CPP_NUMBER
bfb9dc7f 2568 && is_numstart(token2->val.str.text[0])) return CPP_NAME;
041c3194 2569 if (b == CPP_CHAR
bfb9dc7f 2570 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
041c3194 2571 if (b == CPP_STRING
bfb9dc7f 2572 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
041c3194
ZW
2573 break;
2574
2575 case CPP_NUMBER:
2576 if (b == CPP_NUMBER) return CPP_NUMBER;
2577 if (b == CPP_NAME) return CPP_NUMBER;
2578 if (b == CPP_DOT) return CPP_NUMBER;
2579 /* Numbers cannot have length zero, so this is safe. */
2580 if ((b == CPP_PLUS || b == CPP_MINUS)
bfb9dc7f 2581 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
041c3194
ZW
2582 return CPP_NUMBER;
2583 break;
2584
ba89d661
ZW
2585 case CPP_OTHER:
2586 if (CPP_OPTION (pfile, objc) && token1->val.aux == '@')
2587 {
2588 if (b == CPP_NAME) return CPP_NAME;
2589 if (b == CPP_STRING) return CPP_OSTRING;
2590 }
2591
041c3194
ZW
2592 default:
2593 break;
c5a04734
ZW
2594 }
2595
041c3194
ZW
2596 return CPP_EOF;
2597}
2598
2599/* Check if TOKEN is to be ##-pasted with the token after it. */
2600static const cpp_token *
2601maybe_paste_with_next (pfile, token)
2602 cpp_reader *pfile;
2603 const cpp_token *token;
2604{
2605 cpp_token *pasted;
2606 const cpp_token *second;
2607 cpp_context *context = CURRENT_CONTEXT (pfile);
2608
2609 /* Is this token on the LHS of ## ? */
041c3194 2610
417f3e3a
ZW
2611 while ((token->flags & PASTE_LEFT)
2612 || ((context->flags & CONTEXT_PASTEL)
2613 && context->posn == context->count))
c5a04734 2614 {
417f3e3a
ZW
2615 /* Suppress macro expansion for next token, but don't conflict
2616 with the other method of suppression. If it is an argument,
2617 macro expansion within the argument will still occur. */
2618 pfile->paste_level = pfile->cur_context;
2619 second = _cpp_get_token (pfile);
2620 pfile->paste_level = 0;
2621
2622 /* Ignore placemarker argument tokens (cannot be from an empty
2623 macro since macros are not expanded). */
2624 if (token->type == CPP_PLACEMARKER)
2625 pasted = duplicate_token (pfile, second);
2626 else if (second->type == CPP_PLACEMARKER)
041c3194 2627 {
5ef865d5
ZW
2628 /* GCC has special extended semantics for , ## b where b is
2629 a varargs parameter: the comma disappears if b was given
2630 no actual arguments (not merely if b is an empty
2631 argument). */
2632 if (token->type == CPP_COMMA && second->flags & VOID_REST)
6fee6033 2633 pasted = duplicate_token (pfile, second);
417f3e3a
ZW
2634 else
2635 pasted = duplicate_token (pfile, token);
041c3194
ZW
2636 }
2637 else
041c3194 2638 {
417f3e3a
ZW
2639 int digraph = 0;
2640 enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
c5a04734 2641
417f3e3a
ZW
2642 if (type == CPP_EOF)
2643 {
2644 if (CPP_OPTION (pfile, warn_paste))
5ef865d5
ZW
2645 {
2646 /* Do not complain about , ## <whatever> if
2647 <whatever> came from a variable argument, because
2648 the author probably intended the ## to trigger
2649 the special extended semantics (see above). */
2650 if (token->type == CPP_COMMA
2651 && IS_ARG_CONTEXT (CURRENT_CONTEXT (pfile))
ff94c747 2652 && ON_REST_ARG (CURRENT_CONTEXT (pfile) - 1))
5ef865d5
ZW
2653 /* no warning */;
2654 else
2655 cpp_warning (pfile,
417f3e3a 2656 "pasting would not give a valid preprocessing token");
5ef865d5 2657 }
417f3e3a 2658 _cpp_push_token (pfile, second);
ff94c747
NB
2659 /* A short term hack to safely clear the PASTE_LEFT flag. */
2660 pasted = duplicate_token (pfile, token);
2661 pasted->flags &= ~PASTE_LEFT;
2662 return pasted;
417f3e3a 2663 }
c5a04734 2664
417f3e3a
ZW
2665 if (type == CPP_NAME || type == CPP_NUMBER)
2666 {
2667 /* Join spellings. */
2668 U_CHAR *buf, *end;
2669
2670 pasted = get_temp_token (pfile);
2671 buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2672 end = spell_token (pfile, token, buf);
2673 end = spell_token (pfile, second, end);
2674 *end = '\0';
041c3194 2675
417f3e3a
ZW
2676 if (type == CPP_NAME)
2677 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2678 else
2679 {
2680 pasted->val.str.text = uxstrdup (buf);
2681 pasted->val.str.len = end - buf;
2682 }
2683 }
ba89d661
ZW
2684 else if (type == CPP_WCHAR || type == CPP_WSTRING
2685 || type == CPP_OSTRING)
417f3e3a 2686 pasted = duplicate_token (pfile, second);
bfb9dc7f
ZW
2687 else
2688 {
417f3e3a
ZW
2689 pasted = get_temp_token (pfile);
2690 pasted->val.integer = 0;
bfb9dc7f 2691 }
417f3e3a
ZW
2692
2693 pasted->type = type;
2694 pasted->flags = digraph ? DIGRAPH : 0;
92936ecf
ZW
2695
2696 if (type == CPP_NAME && pasted->val.node->type == T_OPERATOR)
2697 {
2698 pasted->type = pasted->val.node->value.code;
2699 pasted->flags |= NAMED_OP;
2700 }
041c3194
ZW
2701 }
2702
417f3e3a
ZW
2703 /* The pasted token gets the whitespace flags and position of the
2704 first token, the PASTE_LEFT flag of the second token, plus the
2705 PASTED flag to indicate it is the result of a paste. However, we
2706 want to preserve the DIGRAPH flag. */
2707 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2708 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2709 | (second->flags & PASTE_LEFT) | PASTED);
2710 pasted->col = token->col;
2711 pasted->line = token->line;
2712
2713 /* See if there is another token to be pasted onto the one we just
2714 constructed. */
2715 token = pasted;
2716 context = CURRENT_CONTEXT (pfile);
2717 /* and loop */
041c3194 2718 }
417f3e3a 2719 return token;
041c3194
ZW
2720}
2721
2722/* Convert a token sequence to a single string token according to the
2723 rules of the ISO C #-operator. */
2724#define INIT_SIZE 200
2725static cpp_token *
2726stringify_arg (pfile, token)
c5a04734 2727 cpp_reader *pfile;
041c3194 2728 const cpp_token *token;
c5a04734 2729{
041c3194
ZW
2730 cpp_token *result;
2731 unsigned char *main_buf;
2732 unsigned int prev_value, backslash_count = 0;
2733 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
c5a04734 2734
417f3e3a 2735 push_arg_context (pfile, token);
041c3194
ZW
2736 prev_value = prevent_macro_expansion (pfile);
2737 main_buf = (unsigned char *) xmalloc (buf_cap);
c5a04734 2738
041c3194
ZW
2739 result = get_temp_token (pfile);
2740 ASSIGN_FLAGS_AND_POS (result, token);
2741
417f3e3a 2742 for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
c5a04734 2743 {
041c3194
ZW
2744 int escape;
2745 unsigned char *buf;
2746 unsigned int len = TOKEN_LEN (token);
c5a04734 2747
96be6998
ZW
2748 if (token->type == CPP_PLACEMARKER)
2749 continue;
2750
041c3194
ZW
2751 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2752 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2753 if (escape)
2754 len *= 4 + 1;
2755
2756 if (buf_used + len > buf_cap)
c5a04734 2757 {
041c3194
ZW
2758 buf_cap = buf_used + len + INIT_SIZE;
2759 main_buf = xrealloc (main_buf, buf_cap);
2760 }
c5a04734 2761
041c3194
ZW
2762 if (whitespace && (token->flags & PREV_WHITE))
2763 main_buf[buf_used++] = ' ';
c5a04734 2764
041c3194
ZW
2765 if (escape)
2766 buf = (unsigned char *) xmalloc (len);
2767 else
2768 buf = main_buf + buf_used;
2769
2770 len = spell_token (pfile, token, buf) - buf;
2771 if (escape)
2772 {
2773 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2774 free (buf);
2775 }
2776 else
2777 buf_used += len;
c5a04734 2778
041c3194
ZW
2779 whitespace = 1;
2780 if (token->type == CPP_BACKSLASH)
2781 backslash_count++;
2782 else
2783 backslash_count = 0;
2784 }
c5a04734 2785
041c3194
ZW
2786 /* Ignore the final \ of invalid string literals. */
2787 if (backslash_count & 1)
2788 {
2789 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2790 buf_used--;
2791 }
c5a04734 2792
041c3194 2793 result->type = CPP_STRING;
bfb9dc7f
ZW
2794 result->val.str.text = main_buf;
2795 result->val.str.len = buf_used;
041c3194
ZW
2796 restore_macro_expansion (pfile, prev_value);
2797 return result;
2798}
c5a04734 2799
041c3194
ZW
2800/* Allocate more room on the context stack of PFILE. */
2801static void
2802expand_context_stack (pfile)
2803 cpp_reader *pfile;
2804{
2805 pfile->context_cap += pfile->context_cap + 20;
2806 pfile->contexts = (cpp_context *)
2807 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2808}
c5a04734 2809
041c3194
ZW
2810/* Push the context of macro NODE onto the context stack. TOKEN is
2811 the CPP_NAME token invoking the macro. */
417f3e3a
ZW
2812static int
2813push_macro_context (pfile, token)
041c3194 2814 cpp_reader *pfile;
041c3194
ZW
2815 const cpp_token *token;
2816{
2817 unsigned char orig_flags;
2818 macro_args *args;
2819 cpp_context *context;
417f3e3a 2820 cpp_hashnode *node = token->val.node;
c5a04734 2821
041c3194
ZW
2822 /* Token's flags may change when parsing args containing a nested
2823 invocation of this macro. */
2824 orig_flags = token->flags & (PREV_WHITE | BOL);
2825 args = 0;
2826 if (node->value.expansion->paramc >= 0)
c5a04734 2827 {
041c3194
ZW
2828 unsigned int error, prev_nme;
2829
2830 /* Allocate room for the argument contexts, and parse them. */
2831 args = (macro_args *) xmalloc (sizeof (macro_args));
2832 args->ends = (unsigned int *)
2833 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2834 args->tokens = 0;
2835 args->capacity = 0;
2836 args->used = 0;
041c3194
ZW
2837
2838 prev_nme = prevent_macro_expansion (pfile);
2839 pfile->args = args;
2840 error = parse_args (pfile, node, args);
2841 pfile->args = 0;
2842 restore_macro_expansion (pfile, prev_nme);
2843 if (error)
2844 {
2845 free_macro_args (args);
417f3e3a 2846 return 1;
041c3194 2847 }
08ebc1c5
NB
2848 /* Set the level after the call to parse_args. */
2849 args->level = pfile->cur_context;
c5a04734 2850 }
c5a04734 2851
041c3194
ZW
2852 /* Now push its context. */
2853 pfile->cur_context++;
2854 if (pfile->cur_context == pfile->context_cap)
2855 expand_context_stack (pfile);
2856
2857 context = CURRENT_CONTEXT (pfile);
2858 context->u.list = node->value.expansion;
2859 context->args = args;
2860 context->posn = 0;
2861 context->count = context->u.list->tokens_used;
2862 context->level = pfile->cur_context;
2863 context->flags = 0;
2864 context->pushed_token = 0;
2865
2866 /* Set the flags of the first token. We know there must
2867 be one, empty macros are a single placemarker token. */
2868 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2869
417f3e3a 2870 return 0;
c5a04734
ZW
2871}
2872
041c3194
ZW
2873/* Push an argument to the current macro onto the context stack.
2874 TOKEN is the MACRO_ARG token representing the argument expansion. */
417f3e3a 2875static void
041c3194
ZW
2876push_arg_context (pfile, token)
2877 cpp_reader *pfile;
2878 const cpp_token *token;
c5a04734 2879{
041c3194
ZW
2880 cpp_context *context;
2881 macro_args *args;
2882
2883 pfile->cur_context++;
2884 if (pfile->cur_context == pfile->context_cap)
2885 expand_context_stack (pfile);
2886
2887 context = CURRENT_CONTEXT (pfile);
2888 args = context[-1].args;
2889
2890 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2891 context->u.arg = args->tokens + context->count;
2892 context->count = args->ends[token->val.aux] - context->count;
2893 context->args = 0;
2894 context->posn = 0;
2895 context->level = args->level;
2896 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2897 context->pushed_token = 0;
2898
2899 /* Set the flags of the first token. There is one. */
2900 {
2901 const cpp_token *first = context->u.arg[0];
2902 if (!first)
2903 first = context->u.arg[1];
c5a04734 2904
041c3194
ZW
2905 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2906 token->flags & (PREV_WHITE | BOL));
2907 }
c5a04734 2908
041c3194
ZW
2909 if (token->flags & PASTE_LEFT)
2910 context->flags |= CONTEXT_PASTEL;
2911 if (pfile->paste_level)
2912 context->flags |= CONTEXT_PASTER;
c5a04734
ZW
2913}
2914
041c3194
ZW
2915/* "Unget" a token. It is effectively inserted in the token queue and
2916 will be returned by the next call to get_raw_token. */
c5a04734 2917void
041c3194 2918_cpp_push_token (pfile, token)
c5a04734 2919 cpp_reader *pfile;
041c3194 2920 const cpp_token *token;
c5a04734 2921{
041c3194 2922 cpp_context *context = CURRENT_CONTEXT (pfile);
96be6998
ZW
2923
2924 if (context->posn > 0)
2925 {
2926 const cpp_token *prev;
2927 if (IS_ARG_CONTEXT (context))
2928 prev = context->u.arg[context->posn - 1];
2929 else
2930 prev = &context->u.list->tokens[context->posn - 1];
2931
2932 if (prev == token)
2933 {
2934 context->posn--;
2935 return;
2936 }
2937 }
2938
041c3194
ZW
2939 if (context->pushed_token)
2940 cpp_ice (pfile, "two tokens pushed in a row");
2941 if (token->type != CPP_EOF)
2942 context->pushed_token = token;
2943 /* Don't push back a directive's CPP_EOF, step back instead. */
2944 else if (pfile->cur_context == 0)
2945 pfile->contexts[0].posn--;
2946}
c5a04734 2947
041c3194
ZW
2948/* Handle a preprocessing directive. TOKEN is the CPP_HASH token
2949 introducing the directive. */
2950static void
2951process_directive (pfile, token)
2952 cpp_reader *pfile;
2953 const cpp_token *token;
2954{
2955 const struct directive *d = pfile->token_list.directive;
2956 int prev_nme = 0;
2957
2958 /* Skip over the directive name. */
2959 if (token[1].type == CPP_NAME)
2960 _cpp_get_raw_token (pfile);
2961 else if (token[1].type != CPP_NUMBER)
96be6998 2962 cpp_ice (pfile, "directive begins with %s?!", TOKEN_NAME (token));
041c3194 2963
041c3194
ZW
2964 if (! (d->flags & EXPAND))
2965 prev_nme = prevent_macro_expansion (pfile);
2966 (void) (*d->handler) (pfile);
2967 if (! (d->flags & EXPAND))
2968 restore_macro_expansion (pfile, prev_nme);
2969 _cpp_skip_rest_of_line (pfile);
2970}
c5a04734 2971
041c3194
ZW
2972/* The external interface to return the next token. All macro
2973 expansion and directive processing is handled internally, the
2974 caller only ever sees the output after preprocessing. */
2975const cpp_token *
2976cpp_get_token (pfile)
2977 cpp_reader *pfile;
2978{
2979 const cpp_token *token;
417f3e3a 2980 /* Loop till we hit a non-directive, non-placemarker token. */
041c3194
ZW
2981 for (;;)
2982 {
417f3e3a
ZW
2983 token = _cpp_get_token (pfile);
2984
2985 if (token->type == CPP_PLACEMARKER)
2986 continue;
2987
2988 if (token->type == CPP_HASH && token->flags & BOL
041c3194 2989 && pfile->token_list.directive)
c5a04734 2990 {
041c3194
ZW
2991 process_directive (pfile, token);
2992 continue;
c5a04734
ZW
2993 }
2994
417f3e3a
ZW
2995 return token;
2996 }
2997}
2998
2999/* The internal interface to return the next token. There are two
3000 differences between the internal and external interfaces: the
3001 internal interface may return a PLACEMARKER token, and it does not
3002 process directives. */
3003const cpp_token *
3004_cpp_get_token (pfile)
3005 cpp_reader *pfile;
3006{
6ead1e99 3007 const cpp_token *token, *old_token;
417f3e3a
ZW
3008 cpp_hashnode *node;
3009
3010 /* Loop until we hit a non-macro token. */
3011 for (;;)
3012 {
3013 token = get_raw_token (pfile);
3014
041c3194
ZW
3015 /* Short circuit EOF. */
3016 if (token->type == CPP_EOF)
3017 return token;
417f3e3a
ZW
3018
3019 /* If we are skipping... */
3020 if (pfile->skipping)
c5a04734 3021 {
417f3e3a
ZW
3022 /* we still have to process directives, */
3023 if (pfile->token_list.directive)
3024 return token;
3025
3026 /* but everything else is ignored. */
041c3194
ZW
3027 _cpp_skip_rest_of_line (pfile);
3028 continue;
3029 }
c5a04734 3030
417f3e3a
ZW
3031 /* If there's a potential control macro and we get here, then that
3032 #ifndef didn't cover the entire file and its argument shouldn't
3033 be taken as a control macro. */
3034 pfile->potential_control_macro = 0;
c5a04734 3035
bbdac7d0
ZW
3036 /* If we are rescanning preprocessed input, no macro expansion or
3037 token pasting may occur. */
3038 if (CPP_OPTION (pfile, preprocessed))
3039 return token;
3040
6ead1e99
ZW
3041 old_token = token;
3042
417f3e3a
ZW
3043 /* See if there's a token to paste with this one. */
3044 if (!pfile->paste_level)
3045 token = maybe_paste_with_next (pfile, token);
c5a04734 3046
417f3e3a 3047 /* If it isn't a macro, return it now. */
92936ecf 3048 if (token->type != CPP_NAME || token->val.node->type == T_VOID)
417f3e3a 3049 return token;
c5a04734 3050
92936ecf 3051 /* Is macro expansion disabled in general, or are we in the
6ead1e99
ZW
3052 middle of a token paste, or was this token just pasted?
3053 (Note we don't check token->flags & PASTED, because that
3054 counts tokens that were pasted at some point in the past,
3055 we're only interested in tokens that were pasted by this call
3056 to maybe_paste_with_next.) */
3057 if (pfile->no_expand_level == pfile->cur_context
3058 || pfile->paste_level
3059 || (token != old_token
3060 && pfile->no_expand_level + 1 == pfile->cur_context))
417f3e3a 3061 return token;
6ead1e99 3062
417f3e3a
ZW
3063 node = token->val.node;
3064 if (node->type != T_MACRO)
3065 return special_symbol (pfile, node, token);
c5a04734 3066
041c3194
ZW
3067 if (is_macro_disabled (pfile, node->value.expansion, token))
3068 return token;
c5a04734 3069
417f3e3a
ZW
3070 if (push_macro_context (pfile, token))
3071 return token;
3072 /* else loop */
041c3194 3073 }
041c3194 3074}
c5a04734 3075
041c3194
ZW
3076/* Returns the next raw token, i.e. without performing macro
3077 expansion. Argument contexts are automatically entered. */
3078static const cpp_token *
3079get_raw_token (pfile)
3080 cpp_reader *pfile;
3081{
3082 const cpp_token *result;
417f3e3a 3083 cpp_context *context;
c5a04734 3084
417f3e3a 3085 for (;;)
041c3194 3086 {
417f3e3a
ZW
3087 context = CURRENT_CONTEXT (pfile);
3088 if (context->pushed_token)
041c3194 3089 {
417f3e3a
ZW
3090 result = context->pushed_token;
3091 context->pushed_token = 0;
6fee6033 3092 return result; /* Cannot be a CPP_MACRO_ARG */
417f3e3a
ZW
3093 }
3094 else if (context->posn == context->count)
3095 {
3096 if (pop_context (pfile))
3097 return &eof_token;
3098 continue;
3099 }
6fee6033 3100 else if (IS_ARG_CONTEXT (context))
417f3e3a 3101 {
6fee6033
ZW
3102 result = context->u.arg[context->posn++];
3103 if (result == 0)
c5a04734 3104 {
6fee6033 3105 context->flags ^= CONTEXT_RAW;
041c3194 3106 result = context->u.arg[context->posn++];
c5a04734 3107 }
6fee6033 3108 return result; /* Cannot be a CPP_MACRO_ARG */
041c3194 3109 }
c5a04734 3110
6fee6033
ZW
3111 result = &context->u.list->tokens[context->posn++];
3112
417f3e3a
ZW
3113 if (result->type != CPP_MACRO_ARG)
3114 return result;
3115
3116 if (result->flags & STRINGIFY_ARG)
3117 return stringify_arg (pfile, result);
3118
3119 push_arg_context (pfile, result);
3120 }
041c3194 3121}
c5a04734 3122
041c3194
ZW
3123/* Internal interface to get the token without macro expanding. */
3124const cpp_token *
3125_cpp_get_raw_token (pfile)
3126 cpp_reader *pfile;
3127{
3128 int prev_nme = prevent_macro_expansion (pfile);
417f3e3a 3129 const cpp_token *result = _cpp_get_token (pfile);
041c3194
ZW
3130 restore_macro_expansion (pfile, prev_nme);
3131 return result;
3132}
c5a04734 3133
041c3194
ZW
3134/* A thin wrapper to lex_line. CLEAR is non-zero if the current token
3135 list should be overwritten, or zero if we need to append
3136 (typically, if we are within the arguments to a macro, or looking
3137 for the '(' to start a function-like macro invocation). */
3138static int
3139lex_next (pfile, clear)
3140 cpp_reader *pfile;
3141 int clear;
3142{
3143 cpp_toklist *list = &pfile->token_list;
3144 const cpp_token *old_list = list->tokens;
3145 unsigned int old_used = list->tokens_used;
c5a04734 3146
041c3194 3147 if (clear)
c5a04734 3148 {
041c3194
ZW
3149 /* Release all temporary tokens. */
3150 _cpp_clear_toklist (list);
3151 pfile->contexts[0].posn = 0;
3152 if (pfile->temp_used)
3153 release_temp_tokens (pfile);
c5a04734 3154 }
041c3194
ZW
3155 lex_line (pfile, list);
3156 pfile->contexts[0].count = list->tokens_used;
c5a04734 3157
041c3194 3158 if (!clear && pfile->args)
c5a04734 3159 {
041c3194
ZW
3160 /* Fix up argument token pointers. */
3161 if (old_list != list->tokens)
3162 {
3163 unsigned int i;
3164
3165 for (i = 0; i < pfile->args->used; i++)
3166 {
3167 const cpp_token *token = pfile->args->tokens[i];
3168 if (token >= old_list && token < old_list + old_used)
3169 pfile->args->tokens[i] = (const cpp_token *)
3170 ((char *) token + ((char *) list->tokens - (char *) old_list));
3171 }
3172 }
3173
3174 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3175 tokens within the list of arguments that would otherwise act as
3176 preprocessing directives, the behavior is undefined.
3177
3178 This implementation will report a hard error and treat the
3179 'sequence of preprocessing tokens' as part of the macro argument,
3180 not a directive.
3181
3182 Note if pfile->args == 0, we're OK since we're only inside a
3183 macro argument after a '('. */
3184 if (list->directive)
3185 {
3186 cpp_error_with_line (pfile, list->tokens[old_used].line,
3187 list->tokens[old_used].col,
3188 "#%s may not be used inside a macro argument",
3189 list->directive->name);
91fcd158 3190 return 1;
041c3194 3191 }
c5a04734
ZW
3192 }
3193
041c3194 3194 return 0;
c5a04734
ZW
3195}
3196
417f3e3a
ZW
3197/* Pops a context off the context stack. If we're at the bottom, lexes
3198 the next logical line. Returns EOF if we're at the end of the
5ef865d5 3199 argument list to the # operator, or we should not "overflow"
041c3194
ZW
3200 into the rest of the file (e.g. 6.10.3.1.1). */
3201static int
417f3e3a 3202pop_context (pfile)
041c3194
ZW
3203 cpp_reader *pfile;
3204{
3205 cpp_context *context;
3fef5b2b 3206
041c3194 3207 if (pfile->cur_context == 0)
417f3e3a
ZW
3208 {
3209 /* If we are currently processing a directive, do not advance. 6.10
3210 paragraph 2: A new-line character ends the directive even if it
3211 occurs within what would otherwise be an invocation of a
3212 function-like macro. */
3213 if (pfile->token_list.directive)
3214 return 1;
3215
3216 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3217 }
041c3194
ZW
3218
3219 /* Argument contexts, when parsing args or handling # operator
3220 return CPP_EOF at the end. */
3221 context = CURRENT_CONTEXT (pfile);
3222 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3223 return 1;
3224
3225 /* Free resources when leaving macro contexts. */
3226 if (context->args)
3227 free_macro_args (context->args);
3228
3229 if (pfile->cur_context == pfile->no_expand_level)
3230 pfile->no_expand_level--;
3231 pfile->cur_context--;
3232
3233 return 0;
3234}
3235
041c3194
ZW
3236/* Turn off macro expansion at the current context level. */
3237static unsigned int
3238prevent_macro_expansion (pfile)
3239 cpp_reader *pfile;
3240{
3241 unsigned int prev_value = pfile->no_expand_level;
3242 pfile->no_expand_level = pfile->cur_context;
3243 return prev_value;
3244}
3245
3246/* Restore macro expansion to its previous state. */
3247static void
3248restore_macro_expansion (pfile, prev_value)
3249 cpp_reader *pfile;
3250 unsigned int prev_value;
3251{
3252 pfile->no_expand_level = prev_value;
3253}
3254
3255/* Used by cpperror.c to obtain the correct line and column to report
3256 in a diagnostic. */
3257unsigned int
3258_cpp_get_line (pfile, pcol)
3259 cpp_reader *pfile;
3260 unsigned int *pcol;
3261{
3262 unsigned int index;
3263 const cpp_token *cur_token;
3264
0d9f234d 3265 if (pfile->state.in_lex_line)
041c3194
ZW
3266 index = pfile->token_list.tokens_used;
3267 else
6ead1e99 3268 {
0d9f234d
NB
3269 index = pfile->contexts[0].posn;
3270
3271 if (index == 0)
3272 {
3273 if (pcol)
3274 *pcol = 0;
3275 return 0;
3276 }
3277 index--;
6ead1e99
ZW
3278 }
3279
0d9f234d 3280 cur_token = &pfile->token_list.tokens[index];
041c3194
ZW
3281 if (pcol)
3282 *pcol = cur_token->col;
3283 return cur_token->line;
3284}
3285
3286#define DSC(str) (const U_CHAR *)str, sizeof str - 1
3287static const char * const monthnames[] =
3288{
3289 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3290 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3291};
3292
3293/* Handle builtin macros like __FILE__. */
3294static const cpp_token *
3295special_symbol (pfile, node, token)
3296 cpp_reader *pfile;
3297 cpp_hashnode *node;
d1d9a6bd 3298 const cpp_token *token;
3fef5b2b 3299{
041c3194
ZW
3300 cpp_token *result;
3301 cpp_buffer *ip;
3fef5b2b 3302
041c3194 3303 switch (node->type)
3fef5b2b 3304 {
041c3194
ZW
3305 case T_FILE:
3306 case T_BASE_FILE:
3fef5b2b 3307 {
041c3194 3308 const char *file;
3fef5b2b 3309
041c3194
ZW
3310 ip = CPP_BUFFER (pfile);
3311 if (ip == 0)
3312 file = "";
3fef5b2b 3313 else
041c3194
ZW
3314 {
3315 if (node->type == T_BASE_FILE)
3316 while (CPP_PREV_BUFFER (ip) != NULL)
3317 ip = CPP_PREV_BUFFER (ip);
3318
3319 file = ip->nominal_fname;
3320 }
3321 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3322 strlen (file));
3323 }
3324 break;
3fef5b2b 3325
041c3194 3326 case T_INCLUDE_LEVEL:
f9a0e96c
ZW
3327 /* pfile->include_depth counts the primary source as level 1,
3328 but historically __INCLUDE_DEPTH__ has called the primary
3329 source level 0. */
3330 result = alloc_number_token (pfile, pfile->include_depth - 1);
3fef5b2b
NB
3331 break;
3332
041c3194
ZW
3333 case T_SPECLINE:
3334 /* If __LINE__ is embedded in a macro, it must expand to the
3335 line of the macro's invocation, not its definition.
3336 Otherwise things like assert() will not work properly. */
3337 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
5d7ee2fa
NB
3338 break;
3339
041c3194 3340 case T_STDC:
3fef5b2b 3341 {
041c3194 3342 int stdc = 1;
3fef5b2b 3343
041c3194
ZW
3344#ifdef STDC_0_IN_SYSTEM_HEADERS
3345 if (CPP_IN_SYSTEM_HEADER (pfile)
bfb9dc7f 3346 && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
041c3194
ZW
3347 stdc = 0;
3348#endif
3349 result = alloc_number_token (pfile, stdc);
3fef5b2b
NB
3350 }
3351 break;
3352
041c3194
ZW
3353 case T_DATE:
3354 case T_TIME:
3355 if (pfile->date == 0)
3356 {
3357 /* Allocate __DATE__ and __TIME__ from permanent storage,
3358 and save them in pfile so we don't have to do this again.
3359 We don't generate these strings at init time because
3360 time() and localtime() are very slow on some systems. */
3361 time_t tt = time (NULL);
3362 struct tm *tb = localtime (&tt);
3363
3364 pfile->date = make_string_token
3365 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3366 pfile->time = make_string_token
3367 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3368
bfb9dc7f 3369 sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
041c3194 3370 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
bfb9dc7f 3371 sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
041c3194
ZW
3372 tb->tm_hour, tb->tm_min, tb->tm_sec);
3373 }
3374 result = node->type == T_DATE ? pfile->date: pfile->time;
3fef5b2b
NB
3375 break;
3376
041c3194 3377 case T_POISON:
bfb9dc7f 3378 cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
041c3194
ZW
3379 return token;
3380
3381 default:
3382 cpp_ice (pfile, "invalid special hash type");
3383 return token;
3fef5b2b
NB
3384 }
3385
041c3194
ZW
3386 ASSIGN_FLAGS_AND_POS (result, token);
3387 return result;
3388}
3389#undef DSC
3390
61d0346d 3391/* Allocate pfile->input_buffer, and initialize _cpp_trigraph_map[]
041c3194
ZW
3392 if it hasn't happened already. */
3393
3394void
3395_cpp_init_input_buffer (pfile)
3396 cpp_reader *pfile;
3397{
417f3e3a
ZW
3398 cpp_context *base;
3399
417f3e3a
ZW
3400 _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3401 pfile->no_expand_level = UINT_MAX;
041c3194 3402 pfile->context_cap = 20;
041c3194 3403 pfile->cur_context = 0;
041c3194 3404
417f3e3a
ZW
3405 pfile->contexts = (cpp_context *)
3406 xmalloc (pfile->context_cap * sizeof (cpp_context));
041c3194 3407
417f3e3a
ZW
3408 /* Clear the base context. */
3409 base = &pfile->contexts[0];
3410 base->u.list = &pfile->token_list;
3411 base->posn = 0;
3412 base->count = 0;
3413 base->args = 0;
3414 base->level = 0;
3415 base->flags = 0;
3416 base->pushed_token = 0;
041c3194
ZW
3417}
3418
3419/* Moves to the end of the directive line, popping contexts as
3420 necessary. */
3421void
3422_cpp_skip_rest_of_line (pfile)
3423 cpp_reader *pfile;
3424{
417f3e3a
ZW
3425 /* Discard all stacked contexts. */
3426 int i;
3427 for (i = pfile->cur_context; i > 0; i--)
3428 if (pfile->contexts[i].args)
3429 free_macro_args (pfile->contexts[i].args);
3430
3431 if (pfile->no_expand_level <= pfile->cur_context)
3432 pfile->no_expand_level = 0;
3433 pfile->cur_context = 0;
041c3194 3434
417f3e3a
ZW
3435 /* Clear the base context, and clear the directive pointer so that
3436 get_raw_token will advance to the next line. */
3437 pfile->contexts[0].count = 0;
3438 pfile->contexts[0].posn = 0;
041c3194 3439 pfile->token_list.directive = 0;
c5a04734
ZW
3440}
3441
041c3194
ZW
3442/* Directive handler wrapper used by the command line option
3443 processor. */
3444void
2c8f0515 3445_cpp_run_directive (pfile, dir, buf, count, name)
041c3194
ZW
3446 cpp_reader *pfile;
3447 const struct directive *dir;
3448 const char *buf;
3449 size_t count;
2c8f0515 3450 const char *name;
041c3194
ZW
3451{
3452 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3453 {
3454 unsigned int prev_lvl = 0;
417f3e3a 3455
2c8f0515
ZW
3456 if (name)
3457 CPP_BUFFER (pfile)->nominal_fname = name;
3458 else
3459 CPP_BUFFER (pfile)->nominal_fname = _("<command line>");
3460 CPP_BUFFER (pfile)->lineno = (unsigned int)-1;
3461
417f3e3a
ZW
3462 /* Scan the line now, else prevent_macro_expansion won't work. */
3463 lex_next (pfile, 1);
041c3194
ZW
3464 if (! (dir->flags & EXPAND))
3465 prev_lvl = prevent_macro_expansion (pfile);
3466
3467 (void) (*dir->handler) (pfile);
3468
3469 if (! (dir->flags & EXPAND))
3470 restore_macro_expansion (pfile, prev_lvl);
3471
3472 _cpp_skip_rest_of_line (pfile);
3473 cpp_pop_buffer (pfile);
3474 }
3475}
This page took 0.622689 seconds and 5 git commands to generate.