]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
cpphash.h: (TOKEN_SPELL) Pulled from cpplex.c.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
041c3194
ZW
23/*
24
25Cleanups to do:-
26
041c3194
ZW
27o -dM and with _cpp_dump_list: too many \n output.
28o Put a printer object in cpp_reader?
29o Check line numbers assigned to all errors.
30o Replace strncmp with memcmp almost everywhere.
31o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
041c3194
ZW
32o Convert do_ functions to return void. Kaveh thinks its OK; and said he'll
33 give it a run when we've got some code.
041c3194
ZW
34o Distinguish integers, floats, and 'other' pp-numbers.
35o Store ints and char constants as binary values.
36o New command-line assertion syntax.
041c3194
ZW
37o Work towards functions in cpperror.c taking a message level parameter.
38 If we do this, merge the common code of do_warning and do_error.
39o Comment all functions, and describe macro expansion algorithm.
40o Move as much out of header files as possible.
41o Remove single quote pairs `', and some '', from diagnostics.
42o Correct pastability test for CPP_NAME and CPP_NUMBER.
43
44*/
45
45b966db
ZW
46#include "config.h"
47#include "system.h"
48#include "intl.h"
49#include "cpplib.h"
50#include "cpphash.h"
041c3194 51#include "symcat.h"
45b966db 52
041c3194
ZW
53#define auto_expand_name_space(list) \
54 _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
f2d5f0cc
ZW
55static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
56 size_t, FILE *));
041c3194 57static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
1368ee70 58 unsigned int));
041c3194 59static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
9e62c811 60 unsigned int));
f2d5f0cc 61
041c3194
ZW
62static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
63static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
b8f41010
NB
64 unsigned char *));
65static const unsigned char *backslash_start PARAMS ((cpp_reader *,
66 const unsigned char *));
041c3194
ZW
67static int skip_block_comment PARAMS ((cpp_reader *));
68static int skip_line_comment PARAMS ((cpp_reader *));
b8f41010 69static void skip_whitespace PARAMS ((cpp_reader *, int));
bfb9dc7f
ZW
70static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
71 const U_CHAR *, const U_CHAR *));
72static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
041c3194
ZW
73static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
74 unsigned int));
b8f41010 75static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
f624ffa7
NB
76static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
77 const unsigned char *,
ad265aa4 78 unsigned int, unsigned int));
041c3194
ZW
79static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
80static int lex_next PARAMS ((cpp_reader *, int));
81static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
82 const cpp_token *));
b8f41010 83
041c3194
ZW
84static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
85static void expand_context_stack PARAMS ((cpp_reader *));
d1d9a6bd 86static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
041c3194
ZW
87 unsigned char *));
88static void output_token PARAMS ((cpp_reader *, const cpp_token *,
89 const cpp_token *));
b8f41010
NB
90typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
91 cpp_token *));
041c3194
ZW
92static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
93 unsigned int));
94static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
95static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
96 const cpp_token *));
97static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
98static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
99 const cpp_token *));
100static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
101 const cpp_token *, int *));
102static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
103static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
104static cpp_token *get_temp_token PARAMS ((cpp_reader *));
105static void release_temp_tokens PARAMS ((cpp_reader *));
106static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
107static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
b8f41010 108
bfb9dc7f
ZW
109#define INIT_TOKEN_STR(list, token) \
110 do {(token)->val.str.len = 0; \
111 (token)->val.str.text = (list)->namebuf + (list)->name_used; \
d1d9a6bd 112 } while (0)
b8f41010 113
041c3194
ZW
114#define VALID_SIGN(c, prevc) \
115 (((c) == '+' || (c) == '-') && \
116 ((prevc) == 'e' || (prevc) == 'E' \
117 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
118
b8f41010
NB
119/* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
120 character, if any, is in buffer. */
041c3194 121
b8f41010 122#define handle_newline(cur, limit, c) \
041c3194 123 do { \
b8f41010
NB
124 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
125 (cur)++; \
041c3194
ZW
126 pfile->buffer->lineno++; \
127 pfile->buffer->line_base = (cur); \
6ab3e7dd 128 pfile->col_adjust = 0; \
041c3194 129 } while (0)
b8f41010 130
041c3194 131#define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
b8f41010
NB
132#define PREV_TOKEN_TYPE (cur_token[-1].type)
133
f617b8e2
NB
134#define PUSH_TOKEN(ttype) cur_token++->type = ttype
135#define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
136#define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
137#define BACKUP_DIGRAPH(ttype) do { \
138 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
139
140/* An upper bound on the number of bytes needed to spell a token,
141 including preceding whitespace. */
bfb9dc7f
ZW
142#define TOKEN_LEN(token) (5 + (TOKEN_SPELL(token) == SPELL_STRING \
143 ? (token)->val.str.len \
144 : (TOKEN_SPELL(token) == SPELL_IDENT \
145 ? (token)->val.node->length \
146 : 0)))
f617b8e2 147
f617b8e2 148#define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
5d7ee2fa
NB
149#define I(e, s) {SPELL_IDENT, s},
150#define S(e, s) {SPELL_STRING, s},
b8f41010
NB
151#define C(e, s) {SPELL_CHAR, s},
152#define N(e, s) {SPELL_NONE, s},
b8f41010 153
041c3194
ZW
154const struct token_spelling
155token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
156
157#undef T
158#undef I
159#undef S
160#undef C
161#undef N
162
163/* For debugging: the internal names of the tokens. */
b449f23a
KG
164#define T(e, s) U STRINGX(e),
165#define I(e, s) U STRINGX(e),
166#define S(e, s) U STRINGX(e),
167#define C(e, s) U STRINGX(e),
168#define N(e, s) U STRINGX(e),
041c3194 169
cf00a885 170const U_CHAR *const token_names[N_TTYPES] = { TTYPE_TABLE };
b8f41010
NB
171
172#undef T
5d7ee2fa
NB
173#undef I
174#undef S
b8f41010
NB
175#undef C
176#undef N
b8f41010 177
041c3194
ZW
178/* The following table is used by trigraph_ok/trigraph_replace. If we
179 have designated initializers, it can be constant data; otherwise,
180 it is set up at runtime by _cpp_init_input_buffer. */
181
182#if (GCC_VERSION >= 2007)
183#define init_trigraph_map() /* nothing */
184#define TRIGRAPH_MAP \
185__extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
186#define END };
187#define s(p, v) [p] = v,
188#else
189#define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
190 static void init_trigraph_map PARAMS ((void)) { \
191 unsigned char *x = trigraph_map;
192#define END }
193#define s(p, v) x[p] = v;
194#endif
195
196TRIGRAPH_MAP
197 s('=', '#') s(')', ']') s('!', '|')
198 s('(', '[') s('\'', '^') s('>', '}')
199 s('/', '\\') s('<', '{') s('-', '~')
200END
201
202#undef TRIGRAPH_MAP
203#undef END
204#undef s
205
45b966db
ZW
206/* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
207
208void
209_cpp_grow_token_buffer (pfile, n)
210 cpp_reader *pfile;
211 long n;
212{
213 long old_written = CPP_WRITTEN (pfile);
214 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
215 pfile->token_buffer = (U_CHAR *)
216 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
217 CPP_SET_WRITTEN (pfile, old_written);
218}
219
f2d5f0cc
ZW
220/* Deal with the annoying semantics of fwrite. */
221static void
222safe_fwrite (pfile, buf, len, fp)
223 cpp_reader *pfile;
224 const U_CHAR *buf;
225 size_t len;
226 FILE *fp;
227{
228 size_t count;
45b966db 229
f2d5f0cc
ZW
230 while (len)
231 {
232 count = fwrite (buf, 1, len, fp);
233 if (count == 0)
234 goto error;
235 len -= count;
236 buf += count;
237 }
238 return;
239
240 error:
241 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
242}
243
244/* Notify the compiler proper that the current line number has jumped,
245 or the current file name has changed. */
246
247static void
1368ee70 248output_line_command (pfile, print, line)
45b966db 249 cpp_reader *pfile;
f2d5f0cc 250 cpp_printer *print;
1368ee70 251 unsigned int line;
45b966db 252{
041c3194 253 cpp_buffer *ip = CPP_BUFFER (pfile);
f2d5f0cc
ZW
254 enum { same = 0, enter, leave, rname } change;
255 static const char * const codes[] = { "", " 1", " 2", "" };
256
041c3194
ZW
257 if (line == 0)
258 return;
259
260 /* End the previous line of text. */
261 if (pfile->need_newline)
262 putc ('\n', print->outf);
263 pfile->need_newline = 0;
264
f2d5f0cc
ZW
265 if (CPP_OPTION (pfile, no_line_commands))
266 return;
267
041c3194
ZW
268 /* If ip is null, we've been called from cpp_finish, and they just
269 needed the final flush and trailing newline. */
270 if (!ip)
271 return;
272
fb753f88 273 if (pfile->include_depth == print->last_id)
54bef41d 274 {
fb753f88
JJ
275 /* Determine whether the current filename has changed, and if so,
276 how. 'nominal_fname' values are unique, so they can be compared
277 by comparing pointers. */
278 if (ip->nominal_fname == print->last_fname)
279 change = same;
280 else
0e500c78 281 change = rname;
fb753f88
JJ
282 }
283 else
284 {
285 if (pfile->include_depth > print->last_id)
286 change = enter;
f2d5f0cc 287 else
fb753f88
JJ
288 change = leave;
289 print->last_id = pfile->include_depth;
45b966db 290 }
fb753f88
JJ
291 print->last_fname = ip->nominal_fname;
292
f2d5f0cc
ZW
293 /* If the current file has not changed, we can output a few newlines
294 instead if we want to increase the line number by a small amount.
295 We cannot do this if print->lineno is zero, because that means we
296 haven't output any line commands yet. (The very first line
297 command output is a `same_file' command.) */
041c3194 298 if (change == same && print->lineno > 0
f2d5f0cc 299 && line >= print->lineno && line < print->lineno + 8)
45b966db 300 {
f2d5f0cc 301 while (line > print->lineno)
45b966db 302 {
f2d5f0cc
ZW
303 putc ('\n', print->outf);
304 print->lineno++;
45b966db 305 }
f2d5f0cc 306 return;
45b966db 307 }
f2d5f0cc
ZW
308
309#ifndef NO_IMPLICIT_EXTERN_C
310 if (CPP_OPTION (pfile, cplusplus))
311 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
312 codes[change],
c31a6508
ZW
313 ip->inc->sysp ? " 3" : "",
314 (ip->inc->sysp == 2) ? " 4" : "");
f2d5f0cc
ZW
315 else
316#endif
317 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
318 codes[change],
c31a6508 319 ip->inc->sysp ? " 3" : "");
f2d5f0cc
ZW
320 print->lineno = line;
321}
322
323/* Write the contents of the token_buffer to the output stream, and
324 clear the token_buffer. Also handles generating line commands and
325 keeping track of file transitions. */
326
327void
041c3194 328cpp_output_tokens (pfile, print, line)
f2d5f0cc
ZW
329 cpp_reader *pfile;
330 cpp_printer *print;
041c3194 331 unsigned int line;
f2d5f0cc 332{
f6fab919
ZW
333 if (CPP_WRITTEN (pfile) - print->written)
334 {
f6fab919
ZW
335 safe_fwrite (pfile, pfile->token_buffer,
336 CPP_WRITTEN (pfile) - print->written, print->outf);
041c3194
ZW
337 pfile->need_newline = 1;
338 if (print->lineno)
339 print->lineno++;
45b966db 340
041c3194 341 CPP_SET_WRITTEN (pfile, print->written);
f2d5f0cc 342 }
041c3194 343 output_line_command (pfile, print, line);
45b966db
ZW
344}
345
c56c2073 346/* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
f2d5f0cc
ZW
347
348void
349cpp_scan_buffer_nooutput (pfile)
350 cpp_reader *pfile;
351{
f2d5f0cc 352 unsigned int old_written = CPP_WRITTEN (pfile);
041c3194
ZW
353 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
354
f2d5f0cc
ZW
355 for (;;)
356 {
041c3194
ZW
357 /* In no-output mode, we can ignore everything but directives. */
358 const cpp_token *token = cpp_get_token (pfile);
359 if (token->type == CPP_EOF)
360 {
361 cpp_pop_buffer (pfile);
362 if (CPP_BUFFER (pfile) == stop)
363 break;
364 }
365 _cpp_skip_rest_of_line (pfile);
f2d5f0cc
ZW
366 }
367 CPP_SET_WRITTEN (pfile, old_written);
368}
369
c56c2073 370/* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
f2d5f0cc
ZW
371
372void
373cpp_scan_buffer (pfile, print)
374 cpp_reader *pfile;
375 cpp_printer *print;
376{
c56c2073 377 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
041c3194 378 const cpp_token *token, *prev = 0;
f2d5f0cc
ZW
379
380 for (;;)
381 {
382 token = cpp_get_token (pfile);
041c3194 383 if (token->type == CPP_EOF)
f2d5f0cc 384 {
041c3194
ZW
385 cpp_pop_buffer (pfile);
386 if (CPP_BUFFER (pfile) == stop)
f2d5f0cc 387 return;
041c3194
ZW
388 cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
389 prev = 0;
390 continue;
391 }
392
393 if (token->flags & BOL)
394 {
395 cpp_output_tokens (pfile, print, pfile->token_list.line);
396 prev = 0;
f2d5f0cc 397 }
041c3194
ZW
398
399 output_token (pfile, token, prev);
400 prev = token;
f2d5f0cc
ZW
401 }
402}
403
041c3194
ZW
404/* Helper routine used by parse_include, which can't see spell_token.
405 Reinterpret the current line as an h-char-sequence (< ... >); we are
406 looking at the first token after the <. */
407const cpp_token *
408_cpp_glue_header_name (pfile)
45b966db
ZW
409 cpp_reader *pfile;
410{
041c3194
ZW
411 unsigned int written = CPP_WRITTEN (pfile);
412 const cpp_token *t;
413 cpp_token *hdr;
414 U_CHAR *buf;
415 size_t len;
416
417 for (;;)
418 {
417f3e3a 419 t = _cpp_get_token (pfile);
041c3194
ZW
420 if (t->type == CPP_GREATER || t->type == CPP_EOF)
421 break;
422
423 CPP_RESERVE (pfile, TOKEN_LEN (t));
424 if (t->flags & PREV_WHITE)
425 CPP_PUTC_Q (pfile, ' ');
426 pfile->limit = spell_token (pfile, t, pfile->limit);
427 }
428
429 if (t->type == CPP_EOF)
430 cpp_error (pfile, "missing terminating > character");
45b966db 431
041c3194
ZW
432 len = CPP_WRITTEN (pfile) - written;
433 buf = xmalloc (len);
434 memcpy (buf, pfile->token_buffer + written, len);
435 CPP_SET_WRITTEN (pfile, written);
436
437 hdr = get_temp_token (pfile);
438 hdr->type = CPP_HEADER_NAME;
439 hdr->flags = 0;
bfb9dc7f
ZW
440 hdr->val.str.text = buf;
441 hdr->val.str.len = len;
041c3194 442 return hdr;
45b966db
ZW
443}
444
1368ee70
ZW
445/* Token-buffer helper functions. */
446
d1d9a6bd
NB
447/* Expand a token list's string space. It is *vital* that
448 list->tokens_used is correct, to get pointer fix-up right. */
041c3194
ZW
449void
450_cpp_expand_name_space (list, len)
1368ee70 451 cpp_toklist *list;
c5a04734
ZW
452 unsigned int len;
453{
f617b8e2 454 const U_CHAR *old_namebuf;
f617b8e2
NB
455
456 old_namebuf = list->namebuf;
c5a04734
ZW
457 list->name_cap += len;
458 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
f617b8e2
NB
459
460 /* Fix up token text pointers. */
79f50f2a 461 if (list->namebuf != old_namebuf)
f617b8e2
NB
462 {
463 unsigned int i;
464
465 for (i = 0; i < list->tokens_used; i++)
bfb9dc7f
ZW
466 if (token_spellings[list->tokens[i].type].type == SPELL_STRING)
467 list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
f617b8e2 468 }
1368ee70
ZW
469}
470
041c3194
ZW
471/* If there is not enough room for LEN more characters, expand the
472 list by just enough to have room for LEN characters. */
473void
474_cpp_reserve_name_space (list, len)
475 cpp_toklist *list;
476 unsigned int len;
477{
478 unsigned int room = list->name_cap - list->name_used;
479
480 if (room < len)
481 _cpp_expand_name_space (list, len - room);
482}
483
1368ee70 484/* Expand the number of tokens in a list. */
d1d9a6bd
NB
485void
486_cpp_expand_token_space (list, count)
1368ee70 487 cpp_toklist *list;
d1d9a6bd 488 unsigned int count;
1368ee70 489{
d1d9a6bd
NB
490 unsigned int n;
491
492 list->tokens_cap += count;
493 n = list->tokens_cap;
15dad1d9 494 if (list->flags & LIST_OFFSET)
d1d9a6bd 495 list->tokens--, n++;
1368ee70 496 list->tokens = (cpp_token *)
d1d9a6bd 497 xrealloc (list->tokens, n * sizeof (cpp_token));
15dad1d9
ZW
498 if (list->flags & LIST_OFFSET)
499 list->tokens++; /* Skip the dummy. */
1368ee70
ZW
500}
501
d1d9a6bd
NB
502/* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
503 an extra token in front of the token list, as this allows the lexer
504 to always peek at the previous token without worrying about
505 underflowing the list, and some initial space. Otherwise, no
506 token- or name-space is allocated, and there is no dummy token. */
15dad1d9 507void
d1d9a6bd 508_cpp_init_toklist (list, flags)
1368ee70 509 cpp_toklist *list;
d1d9a6bd 510 int flags;
1368ee70 511{
d1d9a6bd
NB
512 if (flags == NO_DUMMY_TOKEN)
513 {
514 list->tokens_cap = 0;
041c3194 515 list->tokens = 0;
d1d9a6bd 516 list->name_cap = 0;
041c3194 517 list->namebuf = 0;
d1d9a6bd
NB
518 list->flags = 0;
519 }
520 else
521 {
522 /* Initialize token space. Put a dummy token before the start
523 that will fail matches. */
524 list->tokens_cap = 256; /* 4K's worth. */
525 list->tokens = (cpp_token *)
526 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
527 list->tokens[0].type = CPP_EOF;
528 list->tokens++;
529
530 /* Initialize name space. */
531 list->name_cap = 1024;
041c3194 532 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
d1d9a6bd
NB
533 list->flags = LIST_OFFSET;
534 }
15dad1d9 535
15dad1d9
ZW
536 _cpp_clear_toklist (list);
537}
1368ee70 538
15dad1d9
ZW
539/* Clear a token list. */
540void
541_cpp_clear_toklist (list)
542 cpp_toklist *list;
543{
c5a04734
ZW
544 list->tokens_used = 0;
545 list->name_used = 0;
041c3194
ZW
546 list->directive = 0;
547 list->paramc = 0;
548 list->params_len = 0;
15dad1d9
ZW
549 list->flags &= LIST_OFFSET; /* clear all but that one */
550}
551
552/* Free a token list. Does not free the list itself, which may be
553 embedded in a larger structure. */
554void
555_cpp_free_toklist (list)
041c3194 556 const cpp_toklist *list;
15dad1d9 557{
15dad1d9
ZW
558 if (list->flags & LIST_OFFSET)
559 free (list->tokens - 1); /* Backup over dummy token. */
560 else
561 free (list->tokens);
562 free (list->namebuf);
1368ee70
ZW
563}
564
15dad1d9
ZW
565/* Compare two tokens. */
566int
567_cpp_equiv_tokens (a, b)
568 const cpp_token *a, *b;
569{
041c3194
ZW
570 if (a->type == b->type && a->flags == b->flags)
571 switch (token_spellings[a->type].type)
572 {
573 default: /* Keep compiler happy. */
574 case SPELL_OPERATOR:
575 return 1;
576 case SPELL_CHAR:
577 case SPELL_NONE:
578 return a->val.aux == b->val.aux; /* arg_no or character. */
579 case SPELL_IDENT:
bfb9dc7f 580 return a->val.node == b->val.node;
041c3194 581 case SPELL_STRING:
bfb9dc7f
ZW
582 return (a->val.str.len == b->val.str.len
583 && !memcmp (a->val.str.text, b->val.str.text,
584 a->val.str.len));
041c3194 585 }
15dad1d9 586
041c3194 587 return 0;
15dad1d9
ZW
588}
589
590/* Compare two token lists. */
591int
592_cpp_equiv_toklists (a, b)
593 const cpp_toklist *a, *b;
594{
595 unsigned int i;
596
041c3194
ZW
597 if (a->tokens_used != b->tokens_used
598 || a->flags != b->flags
599 || a->paramc != b->paramc)
15dad1d9
ZW
600 return 0;
601
602 for (i = 0; i < a->tokens_used; i++)
603 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
604 return 0;
605 return 1;
606}
607
041c3194 608/* Utility routine:
9e62c811 609
bfb9dc7f
ZW
610 Compares, the token TOKEN to the NUL-terminated string STRING.
611 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
15dad1d9 612
041c3194 613int
bfb9dc7f
ZW
614cpp_ideq (token, string)
615 const cpp_token *token;
041c3194
ZW
616 const char *string;
617{
bfb9dc7f 618 if (token->type != CPP_NAME)
041c3194 619 return 0;
bfb9dc7f
ZW
620
621 return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
15dad1d9 622}
1368ee70 623
041c3194 624/* Lexing algorithm.
45b966db 625
041c3194
ZW
626 The original lexer in cpplib was made up of two passes: a first pass
627 that replaced trigraphs and deleted esacped newlines, and a second
628 pass that tokenized the result of the first pass. Tokenisation was
629 performed by peeking at the next character in the input stream. For
630 example, if the input stream contained "!=", the handler for the !
631 character would peek at the next character, and if it were a '='
632 would skip over it, and return a "!=" token, otherwise it would
633 return just the "!" token.
61474454 634
041c3194
ZW
635 To implement a single-pass lexer, this peeking ahead is unworkable.
636 An arbitrary number of escaped newlines, and trigraphs (in particular
637 ??/ which translates to the escape \), could separate the '!' and '='
638 in the input stream, yet the next token is still a "!=".
61474454 639
041c3194
ZW
640 Suppose instead that we lex by one logical line at a time, producing
641 a token list or stack for each logical line, and when seeing the '!'
642 push a CPP_NOT token on the list. Then if the '!' is part of a
643 longer token ("!=") we know we must see the remainder of the token by
644 the time we reach the end of the logical line. Thus we can have the
645 '=' handler look at the previous token (at the end of the list / top
646 of the stack) and see if it is a "!" token, and if so, instead of
647 pushing a "=" token revise the existing token to be a "!=" token.
61474454 648
041c3194
ZW
649 This works in the presence of escaped newlines, because the '\' would
650 have been pushed on the top of the stack as a CPP_BACKSLASH. The
651 newline ('\n' or '\r') handler looks at the token at the top of the
652 stack to see if it is a CPP_BACKSLASH, and if so discards both.
653 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
654 the '=' handler would never see any intervening escaped newlines.
45b966db 655
041c3194
ZW
656 To make trigraphs work in this context, as in precedence trigraphs
657 are highest and converted before anything else, the '?' handler does
658 lookahead to see if it is a trigraph, and if so skips the trigraph
659 and pushes the token it represents onto the top of the stack. This
660 also works in the particular case of a CPP_BACKSLASH trigraph.
45b966db 661
041c3194
ZW
662 To the preprocessor, whitespace is only significant to the point of
663 knowing whether whitespace precedes a particular token. For example,
664 the '=' handler needs to know whether there was whitespace between it
665 and a "!" token on the top of the stack, to make the token conversion
666 decision correctly. So each token has a PREV_WHITE flag to
667 indicate this - the standard permits consecutive whitespace to be
668 regarded as a single space. The compiler front ends are not
669 interested in whitespace at all; they just require a token stream.
670 Another place where whitespace is significant to the preprocessor is
671 a #define statment - if there is whitespace between the macro name
672 and an initial "(" token the macro is "object-like", otherwise it is
673 a function-like macro that takes arguments.
674
675 However, all is not rosy. Parsing of identifiers, numbers, comments
676 and strings becomes trickier because of the possibility of raw
677 trigraphs and escaped newlines in the input stream.
678
679 The trigraphs are three consecutive characters beginning with two
680 question marks. A question mark is not valid as part of a number or
681 identifier, so parsing of a number or identifier terminates normally
682 upon reaching it, returning to the mainloop which handles the
683 trigraph just like it would in any other position. Similarly for the
684 backslash of a backslash-newline combination. So we just need the
685 escaped-newline dropper in the mainloop to check if the token on the
686 top of the stack after dropping the escaped newline is a number or
687 identifier, and if so to continue the processing it as if nothing had
688 happened.
689
690 For strings, we replace trigraphs whenever we reach a quote or
691 newline, because there might be a backslash trigraph escaping them.
692 We need to be careful that we start trigraph replacing from where we
693 left off previously, because it is possible for a first scan to leave
694 "fake" trigraphs that a second scan would pick up as real (e.g. the
695 sequence "????/\n=" would find a fake ??= trigraph after removing the
696 escaped newline.)
697
698 For line comments, on reaching a newline we scan the previous
699 character(s) to see if it escaped, and continue if it is. Block
700 comments ignore everything and just focus on finding the comment
701 termination mark. The only difficult thing, and it is surprisingly
702 tricky, is checking if an asterisk precedes the final slash since
703 they could be separated by escaped newlines. If the preprocessor is
704 invoked with the output comments option, we don't bother removing
705 escaped newlines and replacing trigraphs for output.
706
707 Finally, numbers can begin with a period, which is pushed initially
708 as a CPP_DOT token in its own right. The digit handler checks if the
709 previous token was a CPP_DOT not separated by whitespace, and if so
710 pops it off the stack and pushes a period into the number's buffer
711 before calling the number parser.
45b966db 712
041c3194
ZW
713*/
714
715static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
716 U":>", U"<%", U"%>"};
45b966db 717
041c3194
ZW
718/* Call when a trigraph is encountered. It warns if necessary, and
719 returns true if the trigraph should be honoured. END is the third
720 character of a trigraph in the input stream. */
45b966db 721static int
041c3194 722trigraph_ok (pfile, end)
45b966db 723 cpp_reader *pfile;
041c3194 724 const unsigned char *end;
45b966db 725{
041c3194
ZW
726 int accept = CPP_OPTION (pfile, trigraphs);
727
728 if (CPP_OPTION (pfile, warn_trigraphs))
45b966db 729 {
041c3194
ZW
730 unsigned int col = end - 1 - pfile->buffer->line_base;
731 if (accept)
732 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
733 "trigraph ??%c converted to %c",
734 (int) *end, (int) trigraph_map[*end]);
45b966db 735 else
041c3194
ZW
736 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
737 "trigraph ??%c ignored", (int) *end);
45b966db 738 }
041c3194 739 return accept;
45b966db
ZW
740}
741
041c3194
ZW
742/* Scan a string for trigraphs, warning or replacing them inline as
743 appropriate. When parsing a string, we must call this routine
744 before processing a newline character (if trigraphs are enabled),
745 since the newline might be escaped by a preceding backslash
746 trigraph sequence. Returns a pointer to the end of the name after
747 replacement. */
748
749static unsigned char *
750trigraph_replace (pfile, src, limit)
45b966db 751 cpp_reader *pfile;
041c3194
ZW
752 unsigned char *src;
753 unsigned char *limit;
45b966db 754{
041c3194
ZW
755 unsigned char *dest;
756
757 /* Starting with src[1], find two consecutive '?'. The case of no
758 trigraphs is streamlined. */
759
043afb2a 760 for (src++; src + 1 < limit; src += 2)
041c3194
ZW
761 {
762 if (src[0] != '?')
763 continue;
764
765 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
766 if (src[-1] == '?')
767 src--;
768 else if (src + 2 == limit || src[1] != '?')
769 continue;
45b966db 770
041c3194
ZW
771 /* Check if it really is a trigraph. */
772 if (trigraph_map[src[2]] == 0)
773 continue;
45b966db 774
041c3194
ZW
775 dest = src;
776 goto trigraph_found;
777 }
778 return limit;
45b966db 779
041c3194
ZW
780 /* Now we have a trigraph, we need to scan the remaining buffer, and
781 copy-shifting its contents left if replacement is enabled. */
782 for (; src + 2 < limit; dest++, src++)
783 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
784 {
785 trigraph_found:
786 src += 2;
787 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
788 *dest = trigraph_map[*src];
789 }
790
791 /* Copy remaining (at most 2) characters. */
792 while (src < limit)
793 *dest++ = *src++;
794 return dest;
45b966db
ZW
795}
796
041c3194
ZW
797/* If CUR is a backslash or the end of a trigraphed backslash, return
798 a pointer to its beginning, otherwise NULL. We don't read beyond
799 the buffer start, because there is the start of the comment in the
800 buffer. */
801static const unsigned char *
802backslash_start (pfile, cur)
64aaf407 803 cpp_reader *pfile;
041c3194 804 const unsigned char *cur;
64aaf407 805{
041c3194
ZW
806 if (cur[0] == '\\')
807 return cur;
808 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
809 && trigraph_ok (pfile, cur))
810 return cur - 2;
811 return 0;
64aaf407
NB
812}
813
041c3194
ZW
814/* Skip a C-style block comment. This is probably the trickiest
815 handler. We find the end of the comment by seeing if an asterisk
816 is before every '/' we encounter. The nasty complication is that a
817 previous asterisk may be separated by one or more escaped newlines.
818 Returns non-zero if comment terminated by EOF, zero otherwise. */
819static int
820skip_block_comment (pfile)
45b966db
ZW
821 cpp_reader *pfile;
822{
041c3194
ZW
823 cpp_buffer *buffer = pfile->buffer;
824 const unsigned char *char_after_star = 0;
825 register const unsigned char *cur = buffer->cur;
826 int seen_eof = 0;
827
828 /* Inner loop would think the comment has ended if the first comment
829 character is a '/'. Avoid this and keep the inner loop clean by
830 skipping such a character. */
831 if (cur < buffer->rlimit && cur[0] == '/')
832 cur++;
64aaf407 833
041c3194 834 for (; cur < buffer->rlimit; )
45b966db 835 {
041c3194
ZW
836 unsigned char c = *cur++;
837
838 /* People like decorating comments with '*', so check for
839 '/' instead for efficiency. */
840 if (c == '/')
45b966db 841 {
041c3194
ZW
842 if (cur[-2] == '*' || cur - 1 == char_after_star)
843 goto out;
844
845 /* Warn about potential nested comments, but not when
846 the final character inside the comment is a '/'.
847 Don't bother to get it right across escaped newlines. */
848 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
849 && cur[0] == '*' && cur[1] != '/')
45b966db 850 {
041c3194
ZW
851 buffer->cur = cur;
852 cpp_warning (pfile, "'/*' within comment");
45b966db 853 }
45b966db 854 }
91fcd158 855 else if (is_vspace (c))
45b966db 856 {
041c3194
ZW
857 const unsigned char* bslash = backslash_start (pfile, cur - 2);
858
859 handle_newline (cur, buffer->rlimit, c);
860 /* Work correctly if there is an asterisk before an
861 arbirtrarily long sequence of escaped newlines. */
862 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
863 char_after_star = cur;
864 else
865 char_after_star = 0;
45b966db 866 }
45b966db 867 }
041c3194
ZW
868 seen_eof = 1;
869
64aaf407 870 out:
041c3194
ZW
871 buffer->cur = cur;
872 return seen_eof;
45b966db
ZW
873}
874
041c3194
ZW
875/* Skip a C++ or Chill line comment. Handles escaped newlines.
876 Returns non-zero if a multiline comment. */
877static int
878skip_line_comment (pfile)
45b966db
ZW
879 cpp_reader *pfile;
880{
041c3194
ZW
881 cpp_buffer *buffer = pfile->buffer;
882 register const unsigned char *cur = buffer->cur;
883 int multiline = 0;
884
885 for (; cur < buffer->rlimit; )
886 {
887 unsigned char c = *cur++;
888
91fcd158 889 if (is_vspace (c))
45b966db 890 {
041c3194
ZW
891 /* Check for a (trigaph?) backslash escaping the newline. */
892 if (!backslash_start (pfile, cur - 2))
893 goto out;
894 multiline = 1;
895 handle_newline (cur, buffer->rlimit, c);
896 }
897 }
898 cur++;
45b966db 899
041c3194
ZW
900 out:
901 buffer->cur = cur - 1; /* Leave newline for caller. */
902 return multiline;
903}
45b966db 904
041c3194
ZW
905/* Skips whitespace, stopping at next non-whitespace character.
906 Adjusts pfile->col_adjust to account for tabs. This enables tokens
907 to be assigned the correct column. */
908static void
909skip_whitespace (pfile, in_directive)
910 cpp_reader *pfile;
911 int in_directive;
912{
913 cpp_buffer *buffer = pfile->buffer;
91fcd158 914 unsigned short warned = 0;
45b966db 915
91fcd158
NB
916 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
917 while (buffer->cur < buffer->rlimit)
041c3194 918 {
91fcd158 919 unsigned char c = *buffer->cur;
45b966db 920
91fcd158
NB
921 if (!is_nvspace (c))
922 break;
923
924 buffer->cur++;
925 /* Horizontal space always OK. */
926 if (c == ' ')
927 continue;
928 else if (c == '\t')
929 pfile->col_adjust += CPP_OPTION (pfile, tabstop) - 1
930 - (CPP_BUF_COL (buffer) - 1) % CPP_OPTION(pfile, tabstop);
931 /* Must be \f \v or \0. */
932 else if (c == '\0')
041c3194 933 {
91fcd158
NB
934 if (!warned)
935 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
936 CPP_BUF_COL (buffer),
937 "embedded null character ignored");
938 warned = 1;
45b966db 939 }
041c3194 940 else if (in_directive && CPP_PEDANTIC (pfile))
91fcd158
NB
941 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
942 CPP_BUF_COL (buffer),
943 "%s in preprocessing directive",
944 c == '\f' ? "form feed" : "vertical tab");
45b966db 945 }
041c3194 946}
45b966db 947
041c3194 948/* Parse (append) an identifier. */
417f3e3a 949static const U_CHAR *
bfb9dc7f 950parse_name (pfile, tok, cur, rlimit)
45b966db 951 cpp_reader *pfile;
bfb9dc7f
ZW
952 cpp_token *tok;
953 const U_CHAR *cur, *rlimit;
45b966db 954{
bfb9dc7f
ZW
955 const U_CHAR *name = cur;
956 unsigned int len;
041c3194 957
bfb9dc7f 958 while (cur < rlimit)
041c3194 959 {
bfb9dc7f
ZW
960 if (! is_idchar (*cur))
961 break;
e5ec2402
ZW
962 /* $ is not a legal identifier character in the standard, but is
963 commonly accepted as an extension. Don't warn about it in
964 skipped conditional blocks. */
bfb9dc7f 965 if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
041c3194 966 {
bfb9dc7f 967 CPP_BUFFER (pfile)->cur = cur;
041c3194
ZW
968 cpp_pedwarn (pfile, "'$' character in identifier");
969 }
bfb9dc7f 970 cur++;
041c3194 971 }
bfb9dc7f 972 len = cur - name;
45b966db 973
bfb9dc7f 974 if (tok->val.node)
041c3194 975 {
bfb9dc7f
ZW
976 unsigned int oldlen = tok->val.node->length;
977 U_CHAR *newname = alloca (oldlen + len);
978 memcpy (newname, tok->val.node->name, oldlen);
979 memcpy (newname + oldlen, name, len);
980 len += oldlen;
981 name = newname;
041c3194
ZW
982 }
983
bfb9dc7f
ZW
984 tok->val.node = cpp_lookup (pfile, name, len);
985 return cur;
45b966db
ZW
986}
987
041c3194 988/* Parse (append) a number. */
45b966db 989static void
041c3194 990parse_number (pfile, list, name)
45b966db 991 cpp_reader *pfile;
041c3194 992 cpp_toklist *list;
bfb9dc7f 993 cpp_string *name;
45b966db 994{
041c3194
ZW
995 const unsigned char *name_limit;
996 unsigned char *namebuf;
997 cpp_buffer *buffer = pfile->buffer;
998 register const unsigned char *cur = buffer->cur;
45b966db 999
041c3194
ZW
1000 expanded:
1001 name_limit = list->namebuf + list->name_cap;
1002 namebuf = list->namebuf + list->name_used;
45b966db 1003
041c3194
ZW
1004 for (; cur < buffer->rlimit && namebuf < name_limit; )
1005 {
1006 unsigned char c = *namebuf = *cur; /* Copy a single char. */
45b966db 1007
041c3194
ZW
1008 /* Perhaps we should accept '$' here if we accept it for
1009 identifiers. We know namebuf[-1] is safe, because for c to
1010 be a sign we must have pushed at least one character. */
1011 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1012 goto out;
45b966db 1013
041c3194
ZW
1014 namebuf++;
1015 cur++;
45b966db 1016 }
64aaf407 1017
041c3194
ZW
1018 /* Run out of name space? */
1019 if (cur < buffer->rlimit)
1020 {
1021 list->name_used = namebuf - list->namebuf;
1022 auto_expand_name_space (list);
1023 goto expanded;
1024 }
1025
64aaf407 1026 out:
041c3194
ZW
1027 buffer->cur = cur;
1028 name->len = namebuf - name->text;
1029 list->name_used = namebuf - list->namebuf;
45b966db
ZW
1030}
1031
041c3194 1032/* Places a string terminated by an unescaped TERMINATOR into a
bfb9dc7f 1033 cpp_string, which should be expandable and thus at the top of the
041c3194
ZW
1034 list's stack. Handles embedded trigraphs, if necessary, and
1035 escaped newlines.
45b966db 1036
041c3194
ZW
1037 Can be used for character constants (terminator = '\''), string
1038 constants ('"') and angled headers ('>'). Multi-line strings are
1039 allowed, except for within directives. */
45b966db 1040
041c3194
ZW
1041static void
1042parse_string (pfile, list, token, terminator)
45b966db 1043 cpp_reader *pfile;
041c3194
ZW
1044 cpp_toklist *list;
1045 cpp_token *token;
1046 unsigned int terminator;
45b966db 1047{
041c3194 1048 cpp_buffer *buffer = pfile->buffer;
bfb9dc7f 1049 cpp_string *name = &token->val.str;
041c3194
ZW
1050 register const unsigned char *cur = buffer->cur;
1051 const unsigned char *name_limit;
1052 unsigned char *namebuf;
1053 unsigned int null_count = 0;
1054 unsigned int trigraphed = list->name_used;
45b966db 1055
041c3194
ZW
1056 expanded:
1057 name_limit = list->namebuf + list->name_cap;
1058 namebuf = list->namebuf + list->name_used;
f2d5f0cc 1059
041c3194 1060 for (; cur < buffer->rlimit && namebuf < name_limit; )
45b966db 1061 {
041c3194 1062 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
45b966db 1063
041c3194
ZW
1064 if (c == '\0')
1065 null_count++;
91fcd158 1066 else if (c == terminator || is_vspace (c))
45b966db 1067 {
041c3194
ZW
1068 /* Needed for trigraph_replace and multiline string warning. */
1069 buffer->cur = cur;
5eec0563 1070
041c3194
ZW
1071 /* Scan for trigraphs before checking if backslash-escaped. */
1072 if ((CPP_OPTION (pfile, trigraphs)
1073 || CPP_OPTION (pfile, warn_trigraphs))
1074 && namebuf - (list->namebuf + trigraphed) >= 3)
45b966db 1075 {
041c3194
ZW
1076 namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1077 namebuf);
1078 /* The test above guarantees trigraphed will be positive. */
1079 trigraphed = namebuf - list->namebuf - 2;
45b966db 1080 }
45b966db 1081
041c3194 1082 namebuf--; /* Drop the newline / terminator from the name. */
91fcd158 1083 if (is_vspace (c))
45b966db 1084 {
041c3194
ZW
1085 /* Drop a backslash newline, and continue. */
1086 if (namebuf[-1] == '\\')
45b966db 1087 {
041c3194
ZW
1088 handle_newline (cur, buffer->rlimit, c);
1089 namebuf--;
1090 continue;
45b966db 1091 }
45b966db 1092
041c3194 1093 cur--;
45b966db 1094
041c3194
ZW
1095 /* In Fortran and assembly language, silently terminate
1096 strings of either variety at end of line. This is a
1097 kludge around not knowing where comments are in these
1098 languages. */
1099 if (CPP_OPTION (pfile, lang_fortran)
1100 || CPP_OPTION (pfile, lang_asm))
1101 goto out;
64aaf407 1102
041c3194
ZW
1103 /* Character constants, headers and asserts may not
1104 extend over multiple lines. In Standard C, neither
1105 may strings. We accept multiline strings as an
1106 extension. (Even in directives - otherwise, glibc's
1107 longlong.h breaks.) */
1108 if (terminator != '"')
1109 goto unterminated;
1110
1111 cur++; /* Move forwards again. */
45b966db 1112
041c3194
ZW
1113 if (pfile->multiline_string_line == 0)
1114 {
1115 pfile->multiline_string_line = token->line;
1116 pfile->multiline_string_column = token->col;
1117 if (CPP_PEDANTIC (pfile))
1118 cpp_pedwarn (pfile, "multi-line string constant");
1119 }
1120
1121 *namebuf++ = '\n';
1122 handle_newline (cur, buffer->rlimit, c);
45b966db
ZW
1123 }
1124 else
1125 {
041c3194 1126 unsigned char *temp;
45b966db 1127
041c3194
ZW
1128 /* An odd number of consecutive backslashes represents
1129 an escaped terminator. */
1130 temp = namebuf - 1;
1131 while (temp >= name->text && *temp == '\\')
1132 temp--;
45b966db 1133
041c3194
ZW
1134 if ((namebuf - temp) & 1)
1135 goto out;
1136 namebuf++;
1137 }
45b966db 1138 }
ff2b53ef 1139 }
45b966db 1140
041c3194
ZW
1141 /* Run out of name space? */
1142 if (cur < buffer->rlimit)
45b966db 1143 {
041c3194
ZW
1144 list->name_used = namebuf - list->namebuf;
1145 auto_expand_name_space (list);
1146 goto expanded;
1147 }
45b966db 1148
041c3194
ZW
1149 /* We may not have trigraph-replaced the input for this code path,
1150 but as the input is in error by being unterminated we don't
1151 bother. Prevent warnings about no newlines at EOF. */
91fcd158 1152 if (is_vspace (cur[-1]))
041c3194 1153 cur--;
45b966db 1154
041c3194
ZW
1155 unterminated:
1156 cpp_error (pfile, "missing terminating %c character", (int) terminator);
476f2869 1157
041c3194
ZW
1158 if (terminator == '\"' && pfile->multiline_string_line != list->line
1159 && pfile->multiline_string_line != 0)
1160 {
1161 cpp_error_with_line (pfile, pfile->multiline_string_line,
1162 pfile->multiline_string_column,
1163 "possible start of unterminated string literal");
1164 pfile->multiline_string_line = 0;
45b966db 1165 }
041c3194
ZW
1166
1167 out:
1168 buffer->cur = cur;
1169 name->len = namebuf - name->text;
1170 list->name_used = namebuf - list->namebuf;
45b966db 1171
041c3194
ZW
1172 if (null_count > 0)
1173 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1174 : "null character preserved"));
45b966db
ZW
1175}
1176
041c3194
ZW
1177/* The character TYPE helps us distinguish comment types: '*' = C
1178 style, '-' = Chill-style and '/' = C++ style. For code simplicity,
1179 the stored comment includes the comment start and any terminator. */
1180
1181#define COMMENT_START_LEN 2
9e62c811 1182static void
041c3194
ZW
1183save_comment (list, token, from, len, type)
1184 cpp_toklist *list;
1185 cpp_token *token;
1186 const unsigned char *from;
9e62c811 1187 unsigned int len;
041c3194 1188 unsigned int type;
9e62c811 1189{
041c3194
ZW
1190 unsigned char *buffer;
1191
1192 len += COMMENT_START_LEN;
9e62c811 1193
041c3194
ZW
1194 if (list->name_used + len > list->name_cap)
1195 _cpp_expand_name_space (list, len);
9e62c811 1196
bfb9dc7f 1197 INIT_TOKEN_STR (list, token);
041c3194 1198 token->type = CPP_COMMENT;
bfb9dc7f 1199 token->val.str.len = len;
45b966db 1200
041c3194
ZW
1201 buffer = list->namebuf + list->name_used;
1202 list->name_used += len;
45b966db 1203
041c3194
ZW
1204 /* Copy the comment. */
1205 if (type == '*')
45b966db 1206 {
041c3194
ZW
1207 *buffer++ = '/';
1208 *buffer++ = '*';
45b966db 1209 }
041c3194 1210 else
ea4a453b 1211 {
041c3194
ZW
1212 *buffer++ = type;
1213 *buffer++ = type;
ea4a453b 1214 }
041c3194 1215 memcpy (buffer, from, len - COMMENT_START_LEN);
45b966db
ZW
1216}
1217
041c3194
ZW
1218/*
1219 * The tokenizer's main loop. Returns a token list, representing a
1220 * logical line in the input file. On EOF after some tokens have
1221 * been processed, we return immediately. Then in next call, or if
1222 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1223 * token is placed in the list.
1224 *
1225 * Implementation relies almost entirely on lookback, rather than
1226 * looking forwards. This means that tokenization requires just
1227 * a single pass of the file, even in the presence of trigraphs and
1228 * escaped newlines, providing significant performance benefits.
1229 * Trigraph overhead is negligible if they are disabled, and low
1230 * even when enabled.
1231 */
1232
91fcd158 1233#define KNOWN_DIRECTIVE() (list->directive != 0)
041c3194
ZW
1234#define MIGHT_BE_DIRECTIVE() \
1235(cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
45b966db 1236
041c3194
ZW
1237static void
1238lex_line (pfile, list)
45b966db 1239 cpp_reader *pfile;
041c3194 1240 cpp_toklist *list;
45b966db 1241{
041c3194
ZW
1242 cpp_token *cur_token, *token_limit, *first;
1243 cpp_buffer *buffer = pfile->buffer;
1244 const unsigned char *cur = buffer->cur;
1245 unsigned char flags = 0;
1246 unsigned int first_token = list->tokens_used;
45b966db 1247
041c3194
ZW
1248 if (!(list->flags & LIST_OFFSET))
1249 (abort) ();
1250
1251 list->file = buffer->nominal_fname;
1252 list->line = CPP_BUF_LINE (buffer);
1253 pfile->col_adjust = 0;
1254 pfile->in_lex_line = 1;
1255 if (cur == buffer->buf)
1256 list->flags |= BEG_OF_FILE;
1257
1258 expanded:
1259 token_limit = list->tokens + list->tokens_cap;
1260 cur_token = list->tokens + list->tokens_used;
45b966db 1261
041c3194 1262 for (; cur < buffer->rlimit && cur_token < token_limit;)
45b966db 1263 {
041c3194 1264 unsigned char c;
45b966db 1265
91fcd158
NB
1266 /* Optimize non-vertical whitespace skipping; most tokens are
1267 probably separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
1268 c = *cur;
1269 if (is_nvspace (c))
45b966db 1270 {
91fcd158
NB
1271 buffer->cur = cur;
1272 skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1273 && cur_token > &list->tokens[first_token]));
041c3194 1274 cur = buffer->cur;
ff2b53ef 1275
041c3194
ZW
1276 flags = PREV_WHITE;
1277 if (cur == buffer->rlimit)
1278 break;
91fcd158 1279 c = *cur;
45b966db 1280 }
91fcd158 1281 cur++;
45b966db 1282
041c3194
ZW
1283 /* Initialize current token. CPP_EOF will not be fixed up by
1284 expand_name_space. */
1285 list->tokens_used = cur_token - list->tokens + 1;
1286 cur_token->type = CPP_EOF;
1287 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1288 cur_token->line = CPP_BUF_LINE (buffer);
1289 cur_token->flags = flags;
1290 flags = 0;
46d07497 1291
041c3194
ZW
1292 switch (c)
1293 {
1294 case '0': case '1': case '2': case '3': case '4':
1295 case '5': case '6': case '7': case '8': case '9':
1296 {
1297 int prev_dot;
46d07497 1298
041c3194
ZW
1299 cur--; /* Backup character. */
1300 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1301 if (prev_dot)
1302 cur_token--;
bfb9dc7f 1303 INIT_TOKEN_STR (list, cur_token);
041c3194
ZW
1304 /* Prepend an immediately previous CPP_DOT token. */
1305 if (prev_dot)
1306 {
1307 if (list->name_cap == list->name_used)
1308 auto_expand_name_space (list);
46d07497 1309
bfb9dc7f 1310 cur_token->val.str.len = 1;
041c3194
ZW
1311 list->namebuf[list->name_used++] = '.';
1312 }
46d07497 1313
041c3194
ZW
1314 continue_number:
1315 cur_token->type = CPP_NUMBER; /* Before parse_number. */
1316 buffer->cur = cur;
bfb9dc7f 1317 parse_number (pfile, list, &cur_token->val.str);
041c3194
ZW
1318 cur = buffer->cur;
1319 }
1320 /* Check for # 123 form of #line. */
1321 if (MIGHT_BE_DIRECTIVE ())
1322 list->directive = _cpp_check_linemarker (pfile, cur_token,
1323 !(cur_token[-1].flags
1324 & PREV_WHITE));
1325 cur_token++;
1326 break;
46d07497 1327
041c3194
ZW
1328 letter:
1329 case '_':
1330 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1331 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1332 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1333 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1334 case 'y': case 'z':
1335 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1336 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1337 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1338 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1339 case 'Y': case 'Z':
1340 cur--; /* Backup character. */
bfb9dc7f 1341 cur_token->val.node = 0;
041c3194
ZW
1342 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
1343
1344 continue_name:
bfb9dc7f 1345 cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
45b966db 1346
041c3194
ZW
1347 if (MIGHT_BE_DIRECTIVE ())
1348 list->directive = _cpp_check_directive (pfile, cur_token,
1349 !(list->tokens[0].flags
1350 & PREV_WHITE));
1351 cur_token++;
1352 break;
f8f769ea 1353
041c3194
ZW
1354 case '\'':
1355 /* Character constants are not recognized when processing Fortran,
1356 or if -traditional. */
1357 if (CPP_OPTION (pfile, lang_fortran) || CPP_TRADITIONAL (pfile))
1358 goto other;
45b966db 1359
041c3194
ZW
1360 /* Fall through. */
1361 case '\"':
1362 /* Traditionally, escaped strings are not strings. */
1363 if (CPP_TRADITIONAL (pfile) && IMMED_TOKEN ()
1364 && PREV_TOKEN_TYPE == CPP_BACKSLASH)
1365 goto other;
04e3ec78 1366
041c3194
ZW
1367 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1368 /* Do we have a wide string? */
1369 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
bfb9dc7f 1370 && cur_token[-1].val.node == pfile->spec_nodes->n_L
041c3194 1371 && !CPP_TRADITIONAL (pfile))
04e3ec78 1372 {
041c3194 1373 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
04e3ec78 1374 }
45b966db 1375
041c3194
ZW
1376 do_parse_string:
1377 /* Here c is one of ' " or >. */
bfb9dc7f 1378 INIT_TOKEN_STR (list, cur_token);
041c3194
ZW
1379 buffer->cur = cur;
1380 parse_string (pfile, list, cur_token, c);
1381 cur = buffer->cur;
1382 cur_token++;
1383 break;
45b966db 1384
041c3194
ZW
1385 case '/':
1386 cur_token->type = CPP_DIV;
1387 if (IMMED_TOKEN ())
1388 {
1389 if (PREV_TOKEN_TYPE == CPP_DIV)
1390 {
1391 /* We silently allow C++ comments in system headers,
1392 irrespective of conformance mode, because lots of
1393 broken systems do that and trying to clean it up
1394 in fixincludes is a nightmare. */
1395 if (CPP_IN_SYSTEM_HEADER (pfile))
1396 goto do_line_comment;
1397 else if (CPP_OPTION (pfile, cplusplus_comments))
1398 {
1399 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1400 && ! buffer->warned_cplusplus_comments)
1401 {
1402 buffer->cur = cur;
1403 cpp_pedwarn (pfile,
1404 "C++ style comments are not allowed in ISO C89");
1405 cpp_pedwarn (pfile,
1406 "(this will be reported only once per input file)");
1407 buffer->warned_cplusplus_comments = 1;
1408 }
1409 do_line_comment:
1410 buffer->cur = cur;
1411#if 0 /* Leave until new lexer in place. */
1412 if (cur[-2] != c)
1413 cpp_warning (pfile,
1414 "comment start split across lines");
1415#endif
1416 if (skip_line_comment (pfile))
1417 cpp_warning (pfile, "multi-line comment");
f8f769ea 1418
041c3194
ZW
1419 /* Back-up to first '-' or '/'. */
1420 cur_token--;
1421 if (!CPP_OPTION (pfile, discard_comments)
91fcd158 1422 && (!KNOWN_DIRECTIVE()
041c3194
ZW
1423 || (list->directive->flags & COMMENTS)))
1424 save_comment (list, cur_token++, cur,
1425 buffer->cur - cur, c);
1426 else if (!CPP_OPTION (pfile, traditional))
1427 flags = PREV_WHITE;
1428
1429 cur = buffer->cur;
1430 break;
1431 }
1432 }
1433 }
1434 cur_token++;
1435 break;
1436
1437 case '*':
1438 cur_token->type = CPP_MULT;
1439 if (IMMED_TOKEN ())
45b966db 1440 {
041c3194
ZW
1441 if (PREV_TOKEN_TYPE == CPP_DIV)
1442 {
1443 buffer->cur = cur;
1444#if 0 /* Leave until new lexer in place. */
1445 if (cur[-2] != '/')
1446 cpp_warning (pfile,
1447 "comment start '/*' split across lines");
1448#endif
1449 if (skip_block_comment (pfile))
1450 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1451 "unterminated comment");
1452#if 0 /* Leave until new lexer in place. */
1453 else if (buffer->cur[-2] != '*')
1454 cpp_warning (pfile,
1455 "comment end '*/' split across lines");
1456#endif
1457 /* Back up to opening '/'. */
1458 cur_token--;
1459 if (!CPP_OPTION (pfile, discard_comments)
91fcd158 1460 && (!KNOWN_DIRECTIVE()
041c3194
ZW
1461 || (list->directive->flags & COMMENTS)))
1462 save_comment (list, cur_token++, cur,
1463 buffer->cur - cur, c);
1464 else if (!CPP_OPTION (pfile, traditional))
1465 flags = PREV_WHITE;
1466
1467 cur = buffer->cur;
1468 break;
1469 }
1470 else if (CPP_OPTION (pfile, cplusplus))
1471 {
1472 /* In C++, there are .* and ->* operators. */
1473 if (PREV_TOKEN_TYPE == CPP_DEREF)
1474 BACKUP_TOKEN (CPP_DEREF_STAR);
1475 else if (PREV_TOKEN_TYPE == CPP_DOT)
1476 BACKUP_TOKEN (CPP_DOT_STAR);
1477 }
45b966db 1478 }
041c3194 1479 cur_token++;
f8f769ea 1480 break;
45b966db 1481
041c3194
ZW
1482 case '\n':
1483 case '\r':
1484 handle_newline (cur, buffer->rlimit, c);
1485 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
45b966db 1486 {
041c3194 1487 if (IMMED_TOKEN ())
45b966db 1488 {
041c3194
ZW
1489 /* Remove the escaped newline. Then continue to process
1490 any interrupted name or number. */
1491 cur_token--;
1492 /* Backslash-newline may not be immediately followed by
1493 EOF (C99 5.1.1.2). */
1494 if (cur >= buffer->rlimit)
1495 {
1496 cpp_pedwarn (pfile, "backslash-newline at end of file");
1497 break;
1498 }
1499 if (IMMED_TOKEN ())
1500 {
1501 cur_token--;
1502 if (cur_token->type == CPP_NAME)
1503 goto continue_name;
1504 else if (cur_token->type == CPP_NUMBER)
1505 goto continue_number;
1506 cur_token++;
1507 }
1508 /* Remember whitespace setting. */
1509 flags = cur_token->flags;
f8f769ea 1510 break;
45b966db 1511 }
041c3194
ZW
1512 else
1513 {
1514 buffer->cur = cur;
1515 cpp_warning (pfile,
1516 "backslash and newline separated by space");
1517 }
1518 }
1519 else if (MIGHT_BE_DIRECTIVE ())
1520 {
1521 /* "Null directive." C99 6.10.7: A preprocessing
1522 directive of the form # <new-line> has no effect.
1523
1524 But it is still a directive, and therefore disappears
1525 from the output. */
1526 cur_token--;
1527 if (cur_token->flags & PREV_WHITE)
45b966db 1528 {
041c3194
ZW
1529 if (CPP_WTRADITIONAL (pfile))
1530 cpp_warning (pfile,
1531 "K+R C ignores #\\n with the # indented");
1532 if (CPP_TRADITIONAL (pfile))
1533 cur_token++;
45b966db 1534 }
f8f769ea 1535 }
45b966db 1536
041c3194
ZW
1537 /* Skip vertical space until we have at least one token to
1538 return. */
1539 if (cur_token != &list->tokens[first_token])
1540 goto out;
1541 list->line = CPP_BUF_LINE (buffer);
f8f769ea 1542 break;
04e3ec78 1543
041c3194
ZW
1544 case '-':
1545 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1546 {
1547 if (CPP_OPTION (pfile, chill))
1548 goto do_line_comment;
1549 REVISE_TOKEN (CPP_MINUS_MINUS);
1550 }
1551 else
1552 PUSH_TOKEN (CPP_MINUS);
1553 break;
45b966db 1554
041c3194
ZW
1555 make_hash:
1556 case '#':
1557 /* The digraph flag checking ensures that ## and %:%:
1558 are interpreted as CPP_PASTE, but #%: and %:# are not. */
1559 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1560 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1561 REVISE_TOKEN (CPP_PASTE);
1562 else
1563 PUSH_TOKEN (CPP_HASH);
1564 break;
04e3ec78 1565
041c3194
ZW
1566 case ':':
1567 cur_token->type = CPP_COLON;
1568 if (IMMED_TOKEN ())
1569 {
1570 if (PREV_TOKEN_TYPE == CPP_COLON
1571 && CPP_OPTION (pfile, cplusplus))
1572 BACKUP_TOKEN (CPP_SCOPE);
9b55f29a 1573 else if (CPP_OPTION (pfile, digraphs))
041c3194 1574 {
9b55f29a
NB
1575 /* Digraph: "<:" is a '[' */
1576 if (PREV_TOKEN_TYPE == CPP_LESS)
1577 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1578 /* Digraph: "%:" is a '#' */
1579 else if (PREV_TOKEN_TYPE == CPP_MOD)
1580 {
1581 (--cur_token)->flags |= DIGRAPH;
1582 goto make_hash;
1583 }
041c3194
ZW
1584 }
1585 }
1586 cur_token++;
1587 break;
f8f769ea 1588
041c3194
ZW
1589 case '&':
1590 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1591 REVISE_TOKEN (CPP_AND_AND);
1592 else
1593 PUSH_TOKEN (CPP_AND);
f8f769ea 1594 break;
45b966db 1595
041c3194
ZW
1596 make_or:
1597 case '|':
1598 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1599 REVISE_TOKEN (CPP_OR_OR);
1600 else
1601 PUSH_TOKEN (CPP_OR);
1602 break;
45b966db 1603
041c3194
ZW
1604 case '+':
1605 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1606 REVISE_TOKEN (CPP_PLUS_PLUS);
1607 else
1608 PUSH_TOKEN (CPP_PLUS);
1609 break;
45b966db 1610
041c3194
ZW
1611 case '=':
1612 /* This relies on equidistance of "?=" and "?" tokens. */
1613 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1614 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1615 else
1616 PUSH_TOKEN (CPP_EQ);
1617 break;
45b966db 1618
041c3194
ZW
1619 case '>':
1620 cur_token->type = CPP_GREATER;
1621 if (IMMED_TOKEN ())
1622 {
1623 if (PREV_TOKEN_TYPE == CPP_GREATER)
1624 BACKUP_TOKEN (CPP_RSHIFT);
1625 else if (PREV_TOKEN_TYPE == CPP_MINUS)
1626 BACKUP_TOKEN (CPP_DEREF);
9b55f29a
NB
1627 else if (CPP_OPTION (pfile, digraphs))
1628 {
1629 /* Digraph: ":>" is a ']' */
1630 if (PREV_TOKEN_TYPE == CPP_COLON)
1631 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1632 /* Digraph: "%>" is a '}' */
1633 else if (PREV_TOKEN_TYPE == CPP_MOD)
1634 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1635 }
041c3194
ZW
1636 }
1637 cur_token++;
1638 break;
1639
1640 case '<':
1641 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1642 {
1643 REVISE_TOKEN (CPP_LSHIFT);
1644 break;
1645 }
1646 /* Is this the beginning of a header name? */
91fcd158 1647 if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
041c3194
ZW
1648 {
1649 c = '>'; /* Terminator. */
1650 cur_token->type = CPP_HEADER_NAME;
1651 goto do_parse_string;
1652 }
1653 PUSH_TOKEN (CPP_LESS);
1654 break;
45b966db 1655
041c3194
ZW
1656 case '%':
1657 /* Digraph: "<%" is a '{' */
1658 cur_token->type = CPP_MOD;
9b55f29a
NB
1659 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1660 && CPP_OPTION (pfile, digraphs))
041c3194
ZW
1661 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1662 cur_token++;
1663 break;
04e3ec78 1664
041c3194
ZW
1665 case '?':
1666 if (cur + 1 < buffer->rlimit && *cur == '?'
1667 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1668 {
1669 /* Handle trigraph. */
1670 cur++;
1671 switch (*cur++)
1672 {
1673 case '(': goto make_open_square;
1674 case ')': goto make_close_square;
1675 case '<': goto make_open_brace;
1676 case '>': goto make_close_brace;
1677 case '=': goto make_hash;
1678 case '!': goto make_or;
1679 case '-': goto make_complement;
1680 case '/': goto make_backslash;
1681 case '\'': goto make_xor;
1682 }
1683 }
1684 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1685 {
1686 /* GNU C++ defines <? and >? operators. */
1687 if (PREV_TOKEN_TYPE == CPP_LESS)
1688 {
1689 REVISE_TOKEN (CPP_MIN);
1690 break;
1691 }
1692 else if (PREV_TOKEN_TYPE == CPP_GREATER)
1693 {
1694 REVISE_TOKEN (CPP_MAX);
1695 break;
1696 }
1697 }
1698 PUSH_TOKEN (CPP_QUERY);
1699 break;
45b966db 1700
041c3194
ZW
1701 case '.':
1702 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1703 && IMMED_TOKEN ()
1704 && !(cur_token[-1].flags & PREV_WHITE))
1705 {
1706 cur_token -= 2;
1707 PUSH_TOKEN (CPP_ELLIPSIS);
1708 }
1709 else
1710 PUSH_TOKEN (CPP_DOT);
1711 break;
c5a04734 1712
041c3194
ZW
1713 make_complement:
1714 case '~': PUSH_TOKEN (CPP_COMPL); break;
1715 make_xor:
1716 case '^': PUSH_TOKEN (CPP_XOR); break;
1717 make_open_brace:
1718 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1719 make_close_brace:
1720 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1721 make_open_square:
1722 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1723 make_close_square:
1724 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1725 make_backslash:
1726 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1727 case '!': PUSH_TOKEN (CPP_NOT); break;
1728 case ',': PUSH_TOKEN (CPP_COMMA); break;
1729 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1730 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1731 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
6d2c2047 1732
041c3194
ZW
1733 case '$':
1734 if (CPP_OPTION (pfile, dollars_in_ident))
1735 goto letter;
1736 /* Fall through */
1737 other:
1738 default:
1739 cur_token->val.aux = c;
1740 PUSH_TOKEN (CPP_OTHER);
1741 break;
1742 }
1743 }
6d2c2047 1744
041c3194
ZW
1745 /* Run out of token space? */
1746 if (cur_token == token_limit)
1747 {
1748 list->tokens_used = cur_token - list->tokens;
1749 _cpp_expand_token_space (list, 256);
1750 goto expanded;
1751 }
6d2c2047 1752
041c3194
ZW
1753 cur_token->flags = flags;
1754 if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1755 {
91fcd158 1756 if (cur > buffer->buf && !is_vspace (cur[-1]))
041c3194
ZW
1757 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1758 CPP_BUF_COLUMN (buffer, cur),
1759 "no newline at end of file");
1760 cur_token++->type = CPP_EOF;
1761 }
6d2c2047 1762
041c3194
ZW
1763 out:
1764 /* All tokens are allocated, so the memory location is fixed. */
1765 first = &list->tokens[first_token];
1766
1767 /* Don't complain about the null directive, nor directives in
1768 assembly source: we don't know where the comments are, and # may
1769 introduce assembler pseudo-ops. Don't complain about invalid
1770 directives in skipped conditional groups (6.10 p4). */
1771 if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1772 && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1773 {
1774 if (first[1].type == CPP_NAME)
1775 cpp_error (pfile, "invalid preprocessing directive #%.*s",
bfb9dc7f 1776 (int) first[1].val.node->length, first[1].val.node->name);
041c3194
ZW
1777 else
1778 cpp_error (pfile, "invalid preprocessing directive");
1779 }
1780
91fcd158
NB
1781 /* Put EOF at end of known directives. This covers "directives do
1782 not extend beyond the end of the line (description 6.10 part 2)". */
1783 if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
041c3194
ZW
1784 {
1785 pfile->first_directive_token = first;
1786 cur_token++->type = CPP_EOF;
1787 }
1788
91fcd158
NB
1789 /* Directives, known or not, always start a new line. */
1790 if (first_token == 0 || list->tokens[first_token].type == CPP_HASH)
041c3194 1791 first->flags |= BOL;
6d2c2047 1792 else
041c3194
ZW
1793 /* 6.10.3.10: Within the sequence of preprocessing tokens making
1794 up the invocation of a function-like macro, new line is
1795 considered a normal white-space character. */
1796 first->flags |= PREV_WHITE;
1797
1798 buffer->cur = cur;
1799 list->tokens_used = cur_token - list->tokens;
1800 pfile->in_lex_line = 0;
6d2c2047
ZW
1801}
1802
041c3194
ZW
1803/* Write the spelling of a token TOKEN, with any appropriate
1804 whitespace before it, to the token_buffer. PREV is the previous
1805 token, which is used to determine if we need to shove in an extra
1806 space in order to avoid accidental token paste. */
1807static void
1808output_token (pfile, token, prev)
1809 cpp_reader *pfile;
1810 const cpp_token *token, *prev;
1811{
1812 int dummy;
c5a04734 1813
041c3194
ZW
1814 if (token->col && (token->flags & BOL))
1815 {
1816 /* Supply enough whitespace to put this token in its original
1817 column. Don't bother trying to reconstruct tabs; we can't
1818 get it right in general, and nothing ought to care. (Yes,
1819 some things do care; the fault lies with them.) */
1820 unsigned char *buffer;
1821 unsigned int spaces = token->col - 1;
1822
1823 CPP_RESERVE (pfile, token->col);
1824 buffer = pfile->limit;
1825
1826 while (spaces--)
1827 *buffer++ = ' ';
1828 pfile->limit = buffer;
1829 }
1830 else if (token->flags & PREV_WHITE)
1831 CPP_PUTC (pfile, ' ');
1832 /* Check for and prevent accidental token pasting, in ANSI mode. */
d6d5f795 1833
041c3194
ZW
1834 else if (!CPP_TRADITIONAL (pfile) && prev)
1835 {
1836 if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1837 CPP_PUTC (pfile, ' ');
1838 /* can_paste catches most of the accidental paste cases, but not all.
1839 Consider a + ++b - if there is not a space between the + and ++, it
1840 will be misparsed as a++ + b. */
1841 else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1842 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1843 CPP_PUTC (pfile, ' ');
1844 }
d6d5f795 1845
041c3194
ZW
1846 CPP_RESERVE (pfile, TOKEN_LEN (token));
1847 pfile->limit = spell_token (pfile, token, pfile->limit);
1848}
d6d5f795 1849
041c3194 1850/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1851 already contain the enough space to hold the token's spelling.
1852 Returns a pointer to the character after the last character
1853 written. */
d6d5f795 1854
041c3194
ZW
1855static unsigned char *
1856spell_token (pfile, token, buffer)
1857 cpp_reader *pfile; /* Would be nice to be rid of this... */
1858 const cpp_token *token;
1859 unsigned char *buffer;
1860{
1861 switch (token_spellings[token->type].type)
1862 {
1863 case SPELL_OPERATOR:
1864 {
1865 const unsigned char *spelling;
1866 unsigned char c;
d6d5f795 1867
041c3194
ZW
1868 if (token->flags & DIGRAPH)
1869 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1870 else
1871 spelling = token_spellings[token->type].spelling;
1872
1873 while ((c = *spelling++) != '\0')
1874 *buffer++ = c;
1875 }
1876 break;
d6d5f795 1877
041c3194 1878 case SPELL_IDENT:
bfb9dc7f
ZW
1879 memcpy (buffer, token->val.node->name, token->val.node->length);
1880 buffer += token->val.node->length;
041c3194 1881 break;
d6d5f795 1882
041c3194
ZW
1883 case SPELL_STRING:
1884 {
041c3194
ZW
1885 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1886 *buffer++ = 'L';
bfb9dc7f 1887
041c3194 1888 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
bfb9dc7f
ZW
1889 *buffer++ = '"';
1890 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1891 *buffer++ = '\'';
1892
1893 memcpy (buffer, token->val.str.text, token->val.str.len);
1894 buffer += token->val.str.len;
1895
1896 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1897 *buffer++ = '"';
1898 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1899 *buffer++ = '\'';
041c3194
ZW
1900 }
1901 break;
d6d5f795 1902
041c3194
ZW
1903 case SPELL_CHAR:
1904 *buffer++ = token->val.aux;
1905 break;
d6d5f795 1906
041c3194
ZW
1907 case SPELL_NONE:
1908 cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
1909 break;
1910 }
d6d5f795 1911
041c3194
ZW
1912 return buffer;
1913}
d6d5f795 1914
cf00a885
ZW
1915/* Return the spelling of a token known to be an operator.
1916 Does not distinguish digraphs from their counterparts. */
1917const unsigned char *
1918_cpp_spell_operator (type)
1919 enum cpp_ttype type;
1920{
1921 if (token_spellings[type].type == SPELL_OPERATOR)
1922 return token_spellings[type].spelling;
1923 else
1924 return token_names[type];
1925}
1926
1927
041c3194 1928/* Macro expansion algorithm. TODO. */
d6d5f795 1929
7de9cc38
KG
1930static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
1931static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
c5a04734 1932
041c3194
ZW
1933#define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
1934#define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
1935
1936/* Flags for cpp_context. */
1937#define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
1938#define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
1939#define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
1940#define CONTEXT_ARG (1 << 3) /* If an argument context. */
1941
1942#define ASSIGN_FLAGS_AND_POS(d, s) \
1943 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
1944 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1945 } while (0)
1946
1947/* f is flags, just consisting of PREV_WHITE | BOL. */
1948#define MODIFY_FLAGS_AND_POS(d, s, f) \
1949 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
1950 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1951 } while (0)
1952
1953typedef struct cpp_context cpp_context;
1954struct cpp_context
1955{
1956 union
1957 {
1958 const cpp_toklist *list; /* Used for macro contexts only. */
1959 const cpp_token **arg; /* Used for arg contexts only. */
1960 } u;
1961
417f3e3a 1962 /* Pushed token to be returned by next call to get_raw_token. */
041c3194
ZW
1963 const cpp_token *pushed_token;
1964
1965 struct macro_args *args; /* 0 for arguments and object-like macros. */
1966 unsigned short posn; /* Current posn, index into u. */
1967 unsigned short count; /* No. of tokens in u. */
1968 unsigned short level;
1969 unsigned char flags;
1970};
1971
1972typedef struct macro_args macro_args;
1973struct macro_args
1974{
1975 unsigned int *ends;
1976 const cpp_token **tokens;
1977 unsigned int capacity;
1978 unsigned int used;
1979 unsigned short level;
1980};
1981
1982static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
1983static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
1984 macro_args *, unsigned int *));
1985static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
1986static void save_token PARAMS ((macro_args *, const cpp_token *));
417f3e3a
ZW
1987static int pop_context PARAMS ((cpp_reader *));
1988static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
1989static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
041c3194
ZW
1990static void free_macro_args PARAMS ((macro_args *));
1991
1992/* Free the storage allocated for macro arguments. */
1993static void
1994free_macro_args (args)
1995 macro_args *args;
c5a04734 1996{
041c3194 1997 if (args->tokens)
417f3e3a 1998 free ((PTR) args->tokens);
041c3194
ZW
1999 free (args->ends);
2000 free (args);
c5a04734
ZW
2001}
2002
041c3194
ZW
2003/* Determines if a macro has been already used (and is therefore
2004 disabled). */
c5a04734 2005static int
041c3194 2006is_macro_disabled (pfile, expansion, token)
c5a04734 2007 cpp_reader *pfile;
041c3194
ZW
2008 const cpp_toklist *expansion;
2009 const cpp_token *token;
c5a04734 2010{
041c3194
ZW
2011 cpp_context *context = CURRENT_CONTEXT (pfile);
2012
cf00a885
ZW
2013 /* Don't expand anything if this file has already been preprocessed. */
2014 if (CPP_OPTION (pfile, preprocessed))
2015 return 1;
2016
041c3194
ZW
2017 /* Arguments on either side of ## are inserted in place without
2018 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
2019 occurs during a later rescan pass. The effect is that we expand
2020 iff we would as part of the macro's expansion list, so we should
2021 drop to the macro's context. */
2022 if (IS_ARG_CONTEXT (context))
c5a04734 2023 {
041c3194
ZW
2024 if (token->flags & PASTED)
2025 context--;
2026 else if (!(context->flags & CONTEXT_RAW))
2027 return 1;
2028 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2029 context--;
c5a04734 2030 }
c5a04734 2031
041c3194
ZW
2032 /* Have we already used this macro? */
2033 while (context->level > 0)
c5a04734 2034 {
041c3194
ZW
2035 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2036 return 1;
2037 /* Raw argument tokens are judged based on the token list they
2038 came from. */
2039 if (context->flags & CONTEXT_RAW)
2040 context = pfile->contexts + context->level;
2041 else
2042 context--;
2043 }
c5a04734 2044
041c3194
ZW
2045 /* Function-like macros may be disabled if the '(' is not in the
2046 current context. We check this without disrupting the context
2047 stack. */
2048 if (expansion->paramc >= 0)
2049 {
2050 const cpp_token *next;
2051 unsigned int prev_nme;
c5a04734 2052
041c3194
ZW
2053 context = CURRENT_CONTEXT (pfile);
2054 /* Drop down any contexts we're at the end of: the '(' may
2055 appear in lower macro expansions, or in the rest of the file. */
2056 while (context->posn == context->count && context > pfile->contexts)
2057 {
2058 context--;
2059 /* If we matched, we are disabled, as we appear in the
2060 expansion of each macro we meet. */
2061 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2062 return 1;
2063 }
c5a04734 2064
041c3194
ZW
2065 prev_nme = pfile->no_expand_level;
2066 pfile->no_expand_level = context - pfile->contexts;
417f3e3a 2067 next = _cpp_get_token (pfile);
041c3194
ZW
2068 restore_macro_expansion (pfile, prev_nme);
2069 if (next->type != CPP_OPEN_PAREN)
2070 {
2071 _cpp_push_token (pfile, next);
2072 if (CPP_OPTION (pfile, warn_traditional))
2073 cpp_warning (pfile,
2074 "function macro %.*s must be used with arguments in traditional C",
bfb9dc7f 2075 (int) token->val.node->length, token->val.node->name);
041c3194
ZW
2076 return 1;
2077 }
c5a04734 2078 }
c5a04734 2079
041c3194 2080 return 0;
c5a04734
ZW
2081}
2082
041c3194
ZW
2083/* Add a token to the set of tokens forming the arguments to the macro
2084 being parsed in parse_args. */
2085static void
2086save_token (args, token)
2087 macro_args *args;
2088 const cpp_token *token;
c5a04734 2089{
041c3194
ZW
2090 if (args->used == args->capacity)
2091 {
2092 args->capacity += args->capacity + 100;
2093 args->tokens = (const cpp_token **)
417f3e3a
ZW
2094 xrealloc ((PTR) args->tokens,
2095 args->capacity * sizeof (const cpp_token *));
041c3194
ZW
2096 }
2097 args->tokens[args->used++] = token;
c5a04734
ZW
2098}
2099
041c3194
ZW
2100/* Take and save raw tokens until we finish one argument. Empty
2101 arguments are saved as a single CPP_PLACEMARKER token. */
2102static const cpp_token *
2103parse_arg (pfile, var_args, paren_context, args, pcount)
c5a04734 2104 cpp_reader *pfile;
041c3194
ZW
2105 int var_args;
2106 unsigned int paren_context;
2107 macro_args *args;
2108 unsigned int *pcount;
c5a04734 2109{
041c3194
ZW
2110 const cpp_token *token;
2111 unsigned int paren = 0, count = 0;
2112 int raw, was_raw = 1;
c5a04734 2113
041c3194 2114 for (count = 0;; count++)
c5a04734 2115 {
417f3e3a 2116 token = _cpp_get_token (pfile);
c5a04734 2117
041c3194 2118 switch (token->type)
c5a04734 2119 {
041c3194
ZW
2120 default:
2121 break;
c5a04734 2122
041c3194
ZW
2123 case CPP_OPEN_PAREN:
2124 paren++;
2125 break;
2126
2127 case CPP_CLOSE_PAREN:
2128 if (paren-- != 0)
2129 break;
2130 goto out;
2131
2132 case CPP_COMMA:
2133 /* Commas are not terminators within parantheses or var_args. */
2134 if (paren || var_args)
2135 break;
2136 goto out;
2137
2138 case CPP_EOF: /* Error reported by caller. */
2139 goto out;
c5a04734 2140 }
c5a04734 2141
041c3194
ZW
2142 raw = pfile->cur_context <= paren_context;
2143 if (raw != was_raw)
2144 {
2145 was_raw = raw;
2146 save_token (args, 0);
2147 count++;
c5a04734 2148 }
041c3194 2149 save_token (args, token);
c5a04734 2150 }
c5a04734
ZW
2151
2152 out:
041c3194
ZW
2153 if (count == 0)
2154 {
2155 /* Duplicate the placemarker. Then we can set its flags and
2156 position and safely be using more than one. */
2157 save_token (args, duplicate_token (pfile, &placemarker_token));
2158 count++;
2159 }
2160
2161 *pcount = count;
2162 return token;
c5a04734
ZW
2163}
2164
041c3194
ZW
2165/* This macro returns true if the argument starting at offset O of arglist
2166 A is empty - that is, it's either a single PLACEMARKER token, or a null
2167 pointer followed by a PLACEMARKER. */
2168
2169#define empty_argument(A, O) \
2170 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2171 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2172
2173/* Parse the arguments making up a macro invocation. Nested arguments
2174 are automatically macro expanded, but immediate macros are not
2175 expanded; this enables e.g. operator # to work correctly. Returns
2176 non-zero on error. */
c5a04734 2177static int
041c3194 2178parse_args (pfile, hp, args)
c5a04734 2179 cpp_reader *pfile;
041c3194
ZW
2180 cpp_hashnode *hp;
2181 macro_args *args;
c5a04734 2182{
041c3194
ZW
2183 const cpp_token *token;
2184 const cpp_toklist *macro;
2185 unsigned int total = 0;
2186 unsigned int paren_context = pfile->cur_context;
2187 int argc = 0;
2188
2189 macro = hp->value.expansion;
2190 do
c5a04734 2191 {
041c3194 2192 unsigned int count;
c5a04734 2193
041c3194
ZW
2194 token = parse_arg (pfile, (argc + 1 == macro->paramc
2195 && (macro->flags & VAR_ARGS)),
2196 paren_context, args, &count);
2197 if (argc < macro->paramc)
c5a04734 2198 {
041c3194
ZW
2199 total += count;
2200 args->ends[argc] = total;
c5a04734 2201 }
041c3194 2202 argc++;
c5a04734 2203 }
041c3194 2204 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
c5a04734 2205
041c3194
ZW
2206 if (token->type == CPP_EOF)
2207 {
2208 cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2209 hp->length, hp->name);
2210 return 1;
2211 }
2212 else if (argc < macro->paramc)
2213 {
2214 /* A rest argument is allowed to not appear in the invocation at all.
2215 e.g. #define debug(format, args...) ...
2216 debug("string");
2217 This is exactly the same as if the rest argument had received no
563dd08a 2218 tokens - debug("string",); This extension is deprecated. */
041c3194 2219
563dd08a 2220 if (argc + 1 == macro->paramc && (macro->flags & GNU_REST_ARGS))
041c3194
ZW
2221 {
2222 /* Duplicate the placemarker. Then we can set its flags and
2223 position and safely be using more than one. */
2224 save_token (args, duplicate_token (pfile, &placemarker_token));
2225 args->ends[argc] = total + 1;
2226 return 0;
2227 }
2228 else
2229 {
2230 cpp_error (pfile,
2231 "insufficient arguments in invocation of macro \"%.*s\"",
2232 hp->length, hp->name);
2233 return 1;
2234 }
2235 }
2236 /* An empty argument to an empty function-like macro is fine. */
2237 else if (argc > macro->paramc
2238 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2239 {
2240 cpp_error (pfile,
2241 "too many arguments in invocation of macro \"%.*s\"",
2242 hp->length, hp->name);
2243 return 1;
2244 }
c5a04734 2245
041c3194
ZW
2246 return 0;
2247}
2248
2249/* Adds backslashes before all backslashes and double quotes appearing
2250 in strings. Non-printable characters are converted to octal. */
2251static U_CHAR *
2252quote_string (dest, src, len)
2253 U_CHAR *dest;
2254 const U_CHAR *src;
2255 unsigned int len;
2256{
2257 while (len--)
c5a04734 2258 {
041c3194 2259 U_CHAR c = *src++;
c5a04734 2260
041c3194 2261 if (c == '\\' || c == '"')
6ab3e7dd 2262 {
041c3194
ZW
2263 *dest++ = '\\';
2264 *dest++ = c;
2265 }
2266 else
2267 {
2268 if (ISPRINT (c))
2269 *dest++ = c;
2270 else
2271 {
2272 sprintf ((char *) dest, "\\%03o", c);
2273 dest += 4;
2274 }
6ab3e7dd 2275 }
c5a04734 2276 }
c5a04734 2277
041c3194 2278 return dest;
c5a04734
ZW
2279}
2280
041c3194
ZW
2281/* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2282 CPP_STRING token containing TEXT in quoted form. */
2283static cpp_token *
2284make_string_token (token, text, len)
2285 cpp_token *token;
2286 const U_CHAR *text;
2287 unsigned int len;
2288{
2289 U_CHAR *buf;
2290
2291 buf = (U_CHAR *) xmalloc (len * 4);
2292 token->type = CPP_STRING;
2293 token->flags = 0;
bfb9dc7f
ZW
2294 token->val.str.text = buf;
2295 token->val.str.len = quote_string (buf, text, len) - buf;
041c3194
ZW
2296 return token;
2297}
2298
2299/* Allocates and converts a temporary token to a CPP_NUMBER token,
2300 evaluating to NUMBER. */
2301static cpp_token *
2302alloc_number_token (pfile, number)
c5a04734 2303 cpp_reader *pfile;
041c3194 2304 int number;
c5a04734 2305{
041c3194
ZW
2306 cpp_token *result;
2307 char *buf;
2308
2309 result = get_temp_token (pfile);
2310 buf = xmalloc (20);
2311 sprintf (buf, "%d", number);
2312
2313 result->type = CPP_NUMBER;
2314 result->flags = 0;
bfb9dc7f
ZW
2315 result->val.str.text = (U_CHAR *) buf;
2316 result->val.str.len = strlen (buf);
041c3194
ZW
2317 return result;
2318}
c5a04734 2319
041c3194
ZW
2320/* Returns a temporary token from the temporary token store of PFILE. */
2321static cpp_token *
2322get_temp_token (pfile)
2323 cpp_reader *pfile;
2324{
2325 if (pfile->temp_used == pfile->temp_alloced)
2326 {
2327 if (pfile->temp_used == pfile->temp_cap)
2328 {
2329 pfile->temp_cap += pfile->temp_cap + 20;
2330 pfile->temp_tokens = (cpp_token **) xrealloc
2331 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2332 }
2333 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2334 (sizeof (cpp_token));
2335 }
c5a04734 2336
041c3194
ZW
2337 return pfile->temp_tokens[pfile->temp_used++];
2338}
2339
2340/* Release (not free) for re-use the temporary tokens of PFILE. */
2341static void
2342release_temp_tokens (pfile)
2343 cpp_reader *pfile;
2344{
2345 while (pfile->temp_used)
c5a04734 2346 {
041c3194 2347 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
c5a04734 2348
bfb9dc7f 2349 if (token_spellings[token->type].type == SPELL_STRING)
c5a04734 2350 {
bfb9dc7f
ZW
2351 free ((char *) token->val.str.text);
2352 token->val.str.text = 0;
c5a04734
ZW
2353 }
2354 }
041c3194 2355}
c5a04734 2356
041c3194
ZW
2357/* Free all of PFILE's dynamically-allocated temporary tokens. */
2358void
2359_cpp_free_temp_tokens (pfile)
2360 cpp_reader *pfile;
2361{
2362 if (pfile->temp_tokens)
c5a04734 2363 {
041c3194
ZW
2364 /* It is possible, though unlikely (looking for '(' of a funlike
2365 macro into EOF), that we haven't released the tokens yet. */
2366 release_temp_tokens (pfile);
2367 while (pfile->temp_alloced)
2368 free (pfile->temp_tokens[--pfile->temp_alloced]);
2369 free (pfile->temp_tokens);
c5a04734
ZW
2370 }
2371
041c3194
ZW
2372 if (pfile->date)
2373 {
bfb9dc7f 2374 free ((char *) pfile->date->val.str.text);
041c3194 2375 free (pfile->date);
bfb9dc7f 2376 free ((char *) pfile->time->val.str.text);
041c3194
ZW
2377 free (pfile->time);
2378 }
c5a04734
ZW
2379}
2380
041c3194
ZW
2381/* Copy TOKEN into a temporary token from PFILE's store. */
2382static cpp_token *
2383duplicate_token (pfile, token)
2384 cpp_reader *pfile;
2385 const cpp_token *token;
2386{
2387 cpp_token *result = get_temp_token (pfile);
c5a04734 2388
041c3194 2389 *result = *token;
bfb9dc7f 2390 if (token_spellings[token->type].type == SPELL_STRING)
041c3194 2391 {
bfb9dc7f
ZW
2392 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2393 memcpy (buff, token->val.str.text, token->val.str.len);
2394 result->val.str.text = buff;
041c3194
ZW
2395 }
2396 return result;
2397}
c5a04734 2398
041c3194
ZW
2399/* Determine whether two tokens can be pasted together, and if so,
2400 what the resulting token is. Returns CPP_EOF if the tokens cannot
2401 be pasted, or the appropriate type for the merged token if they
2402 can. */
2403static enum cpp_ttype
2404can_paste (pfile, token1, token2, digraph)
2405 cpp_reader * pfile;
2406 const cpp_token *token1, *token2;
2407 int* digraph;
c5a04734 2408{
041c3194
ZW
2409 enum cpp_ttype a = token1->type, b = token2->type;
2410 int cxx = CPP_OPTION (pfile, cplusplus);
c5a04734 2411
041c3194
ZW
2412 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2413 return a + (CPP_EQ_EQ - CPP_EQ);
c5a04734 2414
041c3194 2415 switch (a)
c5a04734 2416 {
041c3194
ZW
2417 case CPP_GREATER:
2418 if (b == a) return CPP_RSHIFT;
2419 if (b == CPP_QUERY && cxx) return CPP_MAX;
2420 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2421 break;
2422 case CPP_LESS:
2423 if (b == a) return CPP_LSHIFT;
2424 if (b == CPP_QUERY && cxx) return CPP_MIN;
2425 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
9b55f29a
NB
2426 if (CPP_OPTION (pfile, digraphs))
2427 {
2428 if (b == CPP_COLON)
2429 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2430 if (b == CPP_MOD)
2431 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2432 }
041c3194 2433 break;
c5a04734 2434
041c3194
ZW
2435 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2436 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2437 case CPP_OR: if (b == a) return CPP_OR_OR; break;
c5a04734 2438
041c3194
ZW
2439 case CPP_MINUS:
2440 if (b == a) return CPP_MINUS_MINUS;
2441 if (b == CPP_GREATER) return CPP_DEREF;
2442 break;
2443 case CPP_COLON:
2444 if (b == a && cxx) return CPP_SCOPE;
9b55f29a 2445 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
041c3194
ZW
2446 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2447 break;
2448
2449 case CPP_MOD:
9b55f29a
NB
2450 if (CPP_OPTION (pfile, digraphs))
2451 {
2452 if (b == CPP_GREATER)
2453 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2454 if (b == CPP_COLON)
2455 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2456 }
041c3194
ZW
2457 break;
2458 case CPP_DEREF:
2459 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2460 break;
2461 case CPP_DOT:
2462 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2463 if (b == CPP_NUMBER) return CPP_NUMBER;
2464 break;
2465
2466 case CPP_HASH:
2467 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2468 /* %:%: digraph */
2469 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2470 break;
2471
2472 case CPP_NAME:
2473 if (b == CPP_NAME) return CPP_NAME;
2474 if (b == CPP_NUMBER
bfb9dc7f 2475 && is_numstart(token2->val.str.text[0])) return CPP_NAME;
041c3194 2476 if (b == CPP_CHAR
bfb9dc7f 2477 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
041c3194 2478 if (b == CPP_STRING
bfb9dc7f 2479 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
041c3194
ZW
2480 break;
2481
2482 case CPP_NUMBER:
2483 if (b == CPP_NUMBER) return CPP_NUMBER;
2484 if (b == CPP_NAME) return CPP_NUMBER;
2485 if (b == CPP_DOT) return CPP_NUMBER;
2486 /* Numbers cannot have length zero, so this is safe. */
2487 if ((b == CPP_PLUS || b == CPP_MINUS)
bfb9dc7f 2488 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
041c3194
ZW
2489 return CPP_NUMBER;
2490 break;
2491
2492 default:
2493 break;
c5a04734
ZW
2494 }
2495
041c3194
ZW
2496 return CPP_EOF;
2497}
2498
2499/* Check if TOKEN is to be ##-pasted with the token after it. */
2500static const cpp_token *
2501maybe_paste_with_next (pfile, token)
2502 cpp_reader *pfile;
2503 const cpp_token *token;
2504{
2505 cpp_token *pasted;
2506 const cpp_token *second;
2507 cpp_context *context = CURRENT_CONTEXT (pfile);
2508
2509 /* Is this token on the LHS of ## ? */
041c3194 2510
417f3e3a
ZW
2511 while ((token->flags & PASTE_LEFT)
2512 || ((context->flags & CONTEXT_PASTEL)
2513 && context->posn == context->count))
c5a04734 2514 {
417f3e3a
ZW
2515 /* Suppress macro expansion for next token, but don't conflict
2516 with the other method of suppression. If it is an argument,
2517 macro expansion within the argument will still occur. */
2518 pfile->paste_level = pfile->cur_context;
2519 second = _cpp_get_token (pfile);
2520 pfile->paste_level = 0;
2521
2522 /* Ignore placemarker argument tokens (cannot be from an empty
2523 macro since macros are not expanded). */
2524 if (token->type == CPP_PLACEMARKER)
2525 pasted = duplicate_token (pfile, second);
2526 else if (second->type == CPP_PLACEMARKER)
041c3194 2527 {
417f3e3a
ZW
2528 cpp_context *mac_context = CURRENT_CONTEXT (pfile) - 1;
2529 /* GCC has special extended semantics for a ## b where b is
2530 a varargs parameter: a disappears if b consists of no
2531 tokens. This extension is deprecated. */
2532 if ((mac_context->u.list->flags & GNU_REST_ARGS)
2533 && (mac_context->u.list->tokens[mac_context->posn-1].val.aux + 1
2534 == (unsigned) mac_context->u.list->paramc))
2535 {
2536 cpp_warning (pfile, "deprecated GNU ## extension used");
2537 pasted = duplicate_token (pfile, second);
2538 }
2539 else
2540 pasted = duplicate_token (pfile, token);
041c3194
ZW
2541 }
2542 else
041c3194 2543 {
417f3e3a
ZW
2544 int digraph = 0;
2545 enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
c5a04734 2546
417f3e3a
ZW
2547 if (type == CPP_EOF)
2548 {
2549 if (CPP_OPTION (pfile, warn_paste))
2550 cpp_warning (pfile,
2551 "pasting would not give a valid preprocessing token");
2552 _cpp_push_token (pfile, second);
2553 return token;
2554 }
c5a04734 2555
417f3e3a
ZW
2556 if (type == CPP_NAME || type == CPP_NUMBER)
2557 {
2558 /* Join spellings. */
2559 U_CHAR *buf, *end;
2560
2561 pasted = get_temp_token (pfile);
2562 buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2563 end = spell_token (pfile, token, buf);
2564 end = spell_token (pfile, second, end);
2565 *end = '\0';
041c3194 2566
417f3e3a
ZW
2567 if (type == CPP_NAME)
2568 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2569 else
2570 {
2571 pasted->val.str.text = uxstrdup (buf);
2572 pasted->val.str.len = end - buf;
2573 }
2574 }
2575 else if (type == CPP_WCHAR || type == CPP_WSTRING)
2576 pasted = duplicate_token (pfile, second);
bfb9dc7f
ZW
2577 else
2578 {
417f3e3a
ZW
2579 pasted = get_temp_token (pfile);
2580 pasted->val.integer = 0;
bfb9dc7f 2581 }
417f3e3a
ZW
2582
2583 pasted->type = type;
2584 pasted->flags = digraph ? DIGRAPH : 0;
041c3194
ZW
2585 }
2586
417f3e3a
ZW
2587 /* The pasted token gets the whitespace flags and position of the
2588 first token, the PASTE_LEFT flag of the second token, plus the
2589 PASTED flag to indicate it is the result of a paste. However, we
2590 want to preserve the DIGRAPH flag. */
2591 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2592 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2593 | (second->flags & PASTE_LEFT) | PASTED);
2594 pasted->col = token->col;
2595 pasted->line = token->line;
2596
2597 /* See if there is another token to be pasted onto the one we just
2598 constructed. */
2599 token = pasted;
2600 context = CURRENT_CONTEXT (pfile);
2601 /* and loop */
041c3194 2602 }
417f3e3a 2603 return token;
041c3194
ZW
2604}
2605
2606/* Convert a token sequence to a single string token according to the
2607 rules of the ISO C #-operator. */
2608#define INIT_SIZE 200
2609static cpp_token *
2610stringify_arg (pfile, token)
c5a04734 2611 cpp_reader *pfile;
041c3194 2612 const cpp_token *token;
c5a04734 2613{
041c3194
ZW
2614 cpp_token *result;
2615 unsigned char *main_buf;
2616 unsigned int prev_value, backslash_count = 0;
2617 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
c5a04734 2618
417f3e3a 2619 push_arg_context (pfile, token);
041c3194
ZW
2620 prev_value = prevent_macro_expansion (pfile);
2621 main_buf = (unsigned char *) xmalloc (buf_cap);
c5a04734 2622
041c3194
ZW
2623 result = get_temp_token (pfile);
2624 ASSIGN_FLAGS_AND_POS (result, token);
2625
417f3e3a 2626 for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
c5a04734 2627 {
041c3194
ZW
2628 int escape;
2629 unsigned char *buf;
2630 unsigned int len = TOKEN_LEN (token);
c5a04734 2631
041c3194
ZW
2632 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2633 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2634 if (escape)
2635 len *= 4 + 1;
2636
2637 if (buf_used + len > buf_cap)
c5a04734 2638 {
041c3194
ZW
2639 buf_cap = buf_used + len + INIT_SIZE;
2640 main_buf = xrealloc (main_buf, buf_cap);
2641 }
c5a04734 2642
041c3194
ZW
2643 if (whitespace && (token->flags & PREV_WHITE))
2644 main_buf[buf_used++] = ' ';
c5a04734 2645
041c3194
ZW
2646 if (escape)
2647 buf = (unsigned char *) xmalloc (len);
2648 else
2649 buf = main_buf + buf_used;
2650
2651 len = spell_token (pfile, token, buf) - buf;
2652 if (escape)
2653 {
2654 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2655 free (buf);
2656 }
2657 else
2658 buf_used += len;
c5a04734 2659
041c3194
ZW
2660 whitespace = 1;
2661 if (token->type == CPP_BACKSLASH)
2662 backslash_count++;
2663 else
2664 backslash_count = 0;
2665 }
c5a04734 2666
041c3194
ZW
2667 /* Ignore the final \ of invalid string literals. */
2668 if (backslash_count & 1)
2669 {
2670 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2671 buf_used--;
2672 }
c5a04734 2673
041c3194 2674 result->type = CPP_STRING;
bfb9dc7f
ZW
2675 result->val.str.text = main_buf;
2676 result->val.str.len = buf_used;
041c3194
ZW
2677 restore_macro_expansion (pfile, prev_value);
2678 return result;
2679}
c5a04734 2680
041c3194
ZW
2681/* Allocate more room on the context stack of PFILE. */
2682static void
2683expand_context_stack (pfile)
2684 cpp_reader *pfile;
2685{
2686 pfile->context_cap += pfile->context_cap + 20;
2687 pfile->contexts = (cpp_context *)
2688 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2689}
c5a04734 2690
041c3194
ZW
2691/* Push the context of macro NODE onto the context stack. TOKEN is
2692 the CPP_NAME token invoking the macro. */
417f3e3a
ZW
2693static int
2694push_macro_context (pfile, token)
041c3194 2695 cpp_reader *pfile;
041c3194
ZW
2696 const cpp_token *token;
2697{
2698 unsigned char orig_flags;
2699 macro_args *args;
2700 cpp_context *context;
417f3e3a 2701 cpp_hashnode *node = token->val.node;
c5a04734 2702
041c3194
ZW
2703 /* Token's flags may change when parsing args containing a nested
2704 invocation of this macro. */
2705 orig_flags = token->flags & (PREV_WHITE | BOL);
2706 args = 0;
2707 if (node->value.expansion->paramc >= 0)
c5a04734 2708 {
041c3194
ZW
2709 unsigned int error, prev_nme;
2710
2711 /* Allocate room for the argument contexts, and parse them. */
2712 args = (macro_args *) xmalloc (sizeof (macro_args));
2713 args->ends = (unsigned int *)
2714 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2715 args->tokens = 0;
2716 args->capacity = 0;
2717 args->used = 0;
2718 args->level = pfile->cur_context;
2719
2720 prev_nme = prevent_macro_expansion (pfile);
2721 pfile->args = args;
2722 error = parse_args (pfile, node, args);
2723 pfile->args = 0;
2724 restore_macro_expansion (pfile, prev_nme);
2725 if (error)
2726 {
2727 free_macro_args (args);
417f3e3a 2728 return 1;
041c3194 2729 }
c5a04734 2730 }
c5a04734 2731
041c3194
ZW
2732 /* Now push its context. */
2733 pfile->cur_context++;
2734 if (pfile->cur_context == pfile->context_cap)
2735 expand_context_stack (pfile);
2736
2737 context = CURRENT_CONTEXT (pfile);
2738 context->u.list = node->value.expansion;
2739 context->args = args;
2740 context->posn = 0;
2741 context->count = context->u.list->tokens_used;
2742 context->level = pfile->cur_context;
2743 context->flags = 0;
2744 context->pushed_token = 0;
2745
2746 /* Set the flags of the first token. We know there must
2747 be one, empty macros are a single placemarker token. */
2748 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2749
417f3e3a 2750 return 0;
c5a04734
ZW
2751}
2752
041c3194
ZW
2753/* Push an argument to the current macro onto the context stack.
2754 TOKEN is the MACRO_ARG token representing the argument expansion. */
417f3e3a 2755static void
041c3194
ZW
2756push_arg_context (pfile, token)
2757 cpp_reader *pfile;
2758 const cpp_token *token;
c5a04734 2759{
041c3194
ZW
2760 cpp_context *context;
2761 macro_args *args;
2762
2763 pfile->cur_context++;
2764 if (pfile->cur_context == pfile->context_cap)
2765 expand_context_stack (pfile);
2766
2767 context = CURRENT_CONTEXT (pfile);
2768 args = context[-1].args;
2769
2770 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2771 context->u.arg = args->tokens + context->count;
2772 context->count = args->ends[token->val.aux] - context->count;
2773 context->args = 0;
2774 context->posn = 0;
2775 context->level = args->level;
2776 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2777 context->pushed_token = 0;
2778
2779 /* Set the flags of the first token. There is one. */
2780 {
2781 const cpp_token *first = context->u.arg[0];
2782 if (!first)
2783 first = context->u.arg[1];
c5a04734 2784
041c3194
ZW
2785 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2786 token->flags & (PREV_WHITE | BOL));
2787 }
c5a04734 2788
041c3194
ZW
2789 if (token->flags & PASTE_LEFT)
2790 context->flags |= CONTEXT_PASTEL;
2791 if (pfile->paste_level)
2792 context->flags |= CONTEXT_PASTER;
c5a04734
ZW
2793}
2794
041c3194
ZW
2795/* "Unget" a token. It is effectively inserted in the token queue and
2796 will be returned by the next call to get_raw_token. */
c5a04734 2797void
041c3194 2798_cpp_push_token (pfile, token)
c5a04734 2799 cpp_reader *pfile;
041c3194 2800 const cpp_token *token;
c5a04734 2801{
041c3194
ZW
2802 cpp_context *context = CURRENT_CONTEXT (pfile);
2803 if (context->pushed_token)
2804 cpp_ice (pfile, "two tokens pushed in a row");
2805 if (token->type != CPP_EOF)
2806 context->pushed_token = token;
2807 /* Don't push back a directive's CPP_EOF, step back instead. */
2808 else if (pfile->cur_context == 0)
2809 pfile->contexts[0].posn--;
2810}
c5a04734 2811
041c3194
ZW
2812/* Handle a preprocessing directive. TOKEN is the CPP_HASH token
2813 introducing the directive. */
2814static void
2815process_directive (pfile, token)
2816 cpp_reader *pfile;
2817 const cpp_token *token;
2818{
2819 const struct directive *d = pfile->token_list.directive;
2820 int prev_nme = 0;
2821
2822 /* Skip over the directive name. */
2823 if (token[1].type == CPP_NAME)
2824 _cpp_get_raw_token (pfile);
2825 else if (token[1].type != CPP_NUMBER)
2826 cpp_ice (pfile, "directive begins with %s?!",
2827 token_names[token[1].type]);
2828
2829 /* Flush pending tokens at this point, in case the directive produces
2830 output. XXX Directive output won't be visible to a direct caller of
2831 cpp_get_token. */
2832 if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
2833 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
2834
2835 if (! (d->flags & EXPAND))
2836 prev_nme = prevent_macro_expansion (pfile);
2837 (void) (*d->handler) (pfile);
2838 if (! (d->flags & EXPAND))
2839 restore_macro_expansion (pfile, prev_nme);
2840 _cpp_skip_rest_of_line (pfile);
2841}
c5a04734 2842
041c3194
ZW
2843/* The external interface to return the next token. All macro
2844 expansion and directive processing is handled internally, the
2845 caller only ever sees the output after preprocessing. */
2846const cpp_token *
2847cpp_get_token (pfile)
2848 cpp_reader *pfile;
2849{
2850 const cpp_token *token;
417f3e3a 2851 /* Loop till we hit a non-directive, non-placemarker token. */
041c3194
ZW
2852 for (;;)
2853 {
417f3e3a
ZW
2854 token = _cpp_get_token (pfile);
2855
2856 if (token->type == CPP_PLACEMARKER)
2857 continue;
2858
2859 if (token->type == CPP_HASH && token->flags & BOL
041c3194 2860 && pfile->token_list.directive)
c5a04734 2861 {
041c3194
ZW
2862 process_directive (pfile, token);
2863 continue;
c5a04734
ZW
2864 }
2865
417f3e3a
ZW
2866 return token;
2867 }
2868}
2869
2870/* The internal interface to return the next token. There are two
2871 differences between the internal and external interfaces: the
2872 internal interface may return a PLACEMARKER token, and it does not
2873 process directives. */
2874const cpp_token *
2875_cpp_get_token (pfile)
2876 cpp_reader *pfile;
2877{
2878 const cpp_token *token;
2879 cpp_hashnode *node;
2880
2881 /* Loop until we hit a non-macro token. */
2882 for (;;)
2883 {
2884 token = get_raw_token (pfile);
2885
041c3194
ZW
2886 /* Short circuit EOF. */
2887 if (token->type == CPP_EOF)
2888 return token;
417f3e3a
ZW
2889
2890 /* If we are skipping... */
2891 if (pfile->skipping)
c5a04734 2892 {
417f3e3a
ZW
2893 /* we still have to process directives, */
2894 if (pfile->token_list.directive)
2895 return token;
2896
2897 /* but everything else is ignored. */
041c3194
ZW
2898 _cpp_skip_rest_of_line (pfile);
2899 continue;
2900 }
c5a04734 2901
417f3e3a
ZW
2902 /* If there's a potential control macro and we get here, then that
2903 #ifndef didn't cover the entire file and its argument shouldn't
2904 be taken as a control macro. */
2905 pfile->potential_control_macro = 0;
c5a04734 2906
417f3e3a
ZW
2907 /* See if there's a token to paste with this one. */
2908 if (!pfile->paste_level)
2909 token = maybe_paste_with_next (pfile, token);
c5a04734 2910
417f3e3a
ZW
2911 /* If it isn't a macro, return it now. */
2912 if (token->type != CPP_NAME
2913 || token->val.node->type == T_VOID)
2914 return token;
c5a04734 2915
417f3e3a
ZW
2916 /* Is macro expansion disabled in general? */
2917 if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
2918 return token;
041c3194 2919
417f3e3a
ZW
2920 node = token->val.node;
2921 if (node->type != T_MACRO)
2922 return special_symbol (pfile, node, token);
c5a04734 2923
041c3194
ZW
2924 if (is_macro_disabled (pfile, node->value.expansion, token))
2925 return token;
c5a04734 2926
417f3e3a
ZW
2927 if (pfile->cur_context > CPP_STACK_MAX)
2928 {
2929 cpp_error (pfile, "macros nested too deep invoking '%s'", node->name);
2930 return token;
2931 }
2932
2933 if (push_macro_context (pfile, token))
2934 return token;
2935 /* else loop */
041c3194 2936 }
041c3194 2937}
c5a04734 2938
041c3194
ZW
2939/* Returns the next raw token, i.e. without performing macro
2940 expansion. Argument contexts are automatically entered. */
2941static const cpp_token *
2942get_raw_token (pfile)
2943 cpp_reader *pfile;
2944{
2945 const cpp_token *result;
417f3e3a 2946 cpp_context *context;
c5a04734 2947
417f3e3a 2948 for (;;)
041c3194 2949 {
417f3e3a
ZW
2950 context = CURRENT_CONTEXT (pfile);
2951 if (context->pushed_token)
041c3194 2952 {
417f3e3a
ZW
2953 result = context->pushed_token;
2954 context->pushed_token = 0;
2955 }
2956 else if (context->posn == context->count)
2957 {
2958 if (pop_context (pfile))
2959 return &eof_token;
2960 continue;
2961 }
2962 else
2963 {
2964 if (IS_ARG_CONTEXT (context))
c5a04734 2965 {
041c3194 2966 result = context->u.arg[context->posn++];
417f3e3a
ZW
2967 if (result == 0)
2968 {
2969 context->flags ^= CONTEXT_RAW;
2970 result = context->u.arg[context->posn++];
2971 }
2972 return result; /* Cannot be a CPP_MACRO_ARG */
c5a04734 2973 }
417f3e3a 2974 result = &context->u.list->tokens[context->posn++];
041c3194 2975 }
c5a04734 2976
417f3e3a
ZW
2977 if (result->type != CPP_MACRO_ARG)
2978 return result;
2979
2980 if (result->flags & STRINGIFY_ARG)
2981 return stringify_arg (pfile, result);
2982
2983 push_arg_context (pfile, result);
2984 }
041c3194 2985}
c5a04734 2986
041c3194
ZW
2987/* Internal interface to get the token without macro expanding. */
2988const cpp_token *
2989_cpp_get_raw_token (pfile)
2990 cpp_reader *pfile;
2991{
2992 int prev_nme = prevent_macro_expansion (pfile);
417f3e3a 2993 const cpp_token *result = _cpp_get_token (pfile);
041c3194
ZW
2994 restore_macro_expansion (pfile, prev_nme);
2995 return result;
2996}
c5a04734 2997
041c3194
ZW
2998/* A thin wrapper to lex_line. CLEAR is non-zero if the current token
2999 list should be overwritten, or zero if we need to append
3000 (typically, if we are within the arguments to a macro, or looking
3001 for the '(' to start a function-like macro invocation). */
3002static int
3003lex_next (pfile, clear)
3004 cpp_reader *pfile;
3005 int clear;
3006{
3007 cpp_toklist *list = &pfile->token_list;
3008 const cpp_token *old_list = list->tokens;
3009 unsigned int old_used = list->tokens_used;
c5a04734 3010
041c3194 3011 if (clear)
c5a04734 3012 {
041c3194
ZW
3013 /* Release all temporary tokens. */
3014 _cpp_clear_toklist (list);
3015 pfile->contexts[0].posn = 0;
3016 if (pfile->temp_used)
3017 release_temp_tokens (pfile);
c5a04734 3018 }
041c3194
ZW
3019
3020 lex_line (pfile, list);
3021 pfile->contexts[0].count = list->tokens_used;
c5a04734 3022
041c3194 3023 if (!clear && pfile->args)
c5a04734 3024 {
041c3194
ZW
3025 /* Fix up argument token pointers. */
3026 if (old_list != list->tokens)
3027 {
3028 unsigned int i;
3029
3030 for (i = 0; i < pfile->args->used; i++)
3031 {
3032 const cpp_token *token = pfile->args->tokens[i];
3033 if (token >= old_list && token < old_list + old_used)
3034 pfile->args->tokens[i] = (const cpp_token *)
3035 ((char *) token + ((char *) list->tokens - (char *) old_list));
3036 }
3037 }
3038
3039 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3040 tokens within the list of arguments that would otherwise act as
3041 preprocessing directives, the behavior is undefined.
3042
3043 This implementation will report a hard error and treat the
3044 'sequence of preprocessing tokens' as part of the macro argument,
3045 not a directive.
3046
3047 Note if pfile->args == 0, we're OK since we're only inside a
3048 macro argument after a '('. */
3049 if (list->directive)
3050 {
3051 cpp_error_with_line (pfile, list->tokens[old_used].line,
3052 list->tokens[old_used].col,
3053 "#%s may not be used inside a macro argument",
3054 list->directive->name);
91fcd158 3055 return 1;
041c3194 3056 }
c5a04734
ZW
3057 }
3058
041c3194 3059 return 0;
c5a04734
ZW
3060}
3061
417f3e3a
ZW
3062/* Pops a context off the context stack. If we're at the bottom, lexes
3063 the next logical line. Returns EOF if we're at the end of the
041c3194
ZW
3064 argument list to the # operator, or if it is illegal to "overflow"
3065 into the rest of the file (e.g. 6.10.3.1.1). */
3066static int
417f3e3a 3067pop_context (pfile)
041c3194
ZW
3068 cpp_reader *pfile;
3069{
3070 cpp_context *context;
3fef5b2b 3071
041c3194 3072 if (pfile->cur_context == 0)
417f3e3a
ZW
3073 {
3074 /* If we are currently processing a directive, do not advance. 6.10
3075 paragraph 2: A new-line character ends the directive even if it
3076 occurs within what would otherwise be an invocation of a
3077 function-like macro. */
3078 if (pfile->token_list.directive)
3079 return 1;
3080
3081 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3082 }
041c3194
ZW
3083
3084 /* Argument contexts, when parsing args or handling # operator
3085 return CPP_EOF at the end. */
3086 context = CURRENT_CONTEXT (pfile);
3087 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3088 return 1;
3089
3090 /* Free resources when leaving macro contexts. */
3091 if (context->args)
3092 free_macro_args (context->args);
3093
3094 if (pfile->cur_context == pfile->no_expand_level)
3095 pfile->no_expand_level--;
3096 pfile->cur_context--;
3097
3098 return 0;
3099}
3100
041c3194
ZW
3101/* Turn off macro expansion at the current context level. */
3102static unsigned int
3103prevent_macro_expansion (pfile)
3104 cpp_reader *pfile;
3105{
3106 unsigned int prev_value = pfile->no_expand_level;
3107 pfile->no_expand_level = pfile->cur_context;
3108 return prev_value;
3109}
3110
3111/* Restore macro expansion to its previous state. */
3112static void
3113restore_macro_expansion (pfile, prev_value)
3114 cpp_reader *pfile;
3115 unsigned int prev_value;
3116{
3117 pfile->no_expand_level = prev_value;
3118}
3119
3120/* Used by cpperror.c to obtain the correct line and column to report
3121 in a diagnostic. */
3122unsigned int
3123_cpp_get_line (pfile, pcol)
3124 cpp_reader *pfile;
3125 unsigned int *pcol;
3126{
3127 unsigned int index;
3128 const cpp_token *cur_token;
3129
3130 if (pfile->in_lex_line)
3131 index = pfile->token_list.tokens_used;
3132 else
3133 index = pfile->contexts[0].posn;
3134
3135 cur_token = &pfile->token_list.tokens[index - 1];
3136 if (pcol)
3137 *pcol = cur_token->col;
3138 return cur_token->line;
3139}
3140
3141#define DSC(str) (const U_CHAR *)str, sizeof str - 1
3142static const char * const monthnames[] =
3143{
3144 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3145 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3146};
3147
3148/* Handle builtin macros like __FILE__. */
3149static const cpp_token *
3150special_symbol (pfile, node, token)
3151 cpp_reader *pfile;
3152 cpp_hashnode *node;
d1d9a6bd 3153 const cpp_token *token;
3fef5b2b 3154{
041c3194
ZW
3155 cpp_token *result;
3156 cpp_buffer *ip;
3fef5b2b 3157
041c3194 3158 switch (node->type)
3fef5b2b 3159 {
041c3194
ZW
3160 case T_FILE:
3161 case T_BASE_FILE:
3fef5b2b 3162 {
041c3194 3163 const char *file;
3fef5b2b 3164
041c3194
ZW
3165 ip = CPP_BUFFER (pfile);
3166 if (ip == 0)
3167 file = "";
3fef5b2b 3168 else
041c3194
ZW
3169 {
3170 if (node->type == T_BASE_FILE)
3171 while (CPP_PREV_BUFFER (ip) != NULL)
3172 ip = CPP_PREV_BUFFER (ip);
3173
3174 file = ip->nominal_fname;
3175 }
3176 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3177 strlen (file));
3178 }
3179 break;
3fef5b2b 3180
041c3194
ZW
3181 case T_INCLUDE_LEVEL:
3182 {
3183 int true_indepth = 0;
3184
3185 /* Do not count the primary source file in the include level. */
3186 ip = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
3187 while (ip)
3188 {
3189 true_indepth++;
3190 ip = CPP_PREV_BUFFER (ip);
3191 }
3192 result = alloc_number_token (pfile, true_indepth);
3fef5b2b
NB
3193 }
3194 break;
3195
041c3194
ZW
3196 case T_SPECLINE:
3197 /* If __LINE__ is embedded in a macro, it must expand to the
3198 line of the macro's invocation, not its definition.
3199 Otherwise things like assert() will not work properly. */
3200 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
5d7ee2fa
NB
3201 break;
3202
041c3194 3203 case T_STDC:
3fef5b2b 3204 {
041c3194 3205 int stdc = 1;
3fef5b2b 3206
041c3194
ZW
3207#ifdef STDC_0_IN_SYSTEM_HEADERS
3208 if (CPP_IN_SYSTEM_HEADER (pfile)
bfb9dc7f 3209 && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
041c3194
ZW
3210 stdc = 0;
3211#endif
3212 result = alloc_number_token (pfile, stdc);
3fef5b2b
NB
3213 }
3214 break;
3215
041c3194
ZW
3216 case T_DATE:
3217 case T_TIME:
3218 if (pfile->date == 0)
3219 {
3220 /* Allocate __DATE__ and __TIME__ from permanent storage,
3221 and save them in pfile so we don't have to do this again.
3222 We don't generate these strings at init time because
3223 time() and localtime() are very slow on some systems. */
3224 time_t tt = time (NULL);
3225 struct tm *tb = localtime (&tt);
3226
3227 pfile->date = make_string_token
3228 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3229 pfile->time = make_string_token
3230 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3231
bfb9dc7f 3232 sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
041c3194 3233 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
bfb9dc7f 3234 sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
041c3194
ZW
3235 tb->tm_hour, tb->tm_min, tb->tm_sec);
3236 }
3237 result = node->type == T_DATE ? pfile->date: pfile->time;
3fef5b2b
NB
3238 break;
3239
041c3194 3240 case T_POISON:
bfb9dc7f 3241 cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
041c3194
ZW
3242 return token;
3243
3244 default:
3245 cpp_ice (pfile, "invalid special hash type");
3246 return token;
3fef5b2b
NB
3247 }
3248
041c3194
ZW
3249 ASSIGN_FLAGS_AND_POS (result, token);
3250 return result;
3251}
3252#undef DSC
3253
3254/* Dump the original user's spelling of argument index ARG_NO to the
3255 macro whose expansion is LIST. */
3256static void
3257dump_param_spelling (pfile, list, arg_no)
3258 cpp_reader *pfile;
3259 const cpp_toklist *list;
3260 unsigned int arg_no;
3261{
3262 const U_CHAR *param = list->namebuf;
3263
3264 while (arg_no--)
3265 param += ustrlen (param) + 1;
3266 CPP_PUTS (pfile, param, ustrlen (param));
3fef5b2b
NB
3267}
3268
041c3194 3269/* Dump a token list to the output. */
c5a04734 3270void
041c3194
ZW
3271_cpp_dump_list (pfile, list, token, flush)
3272 cpp_reader *pfile;
3273 const cpp_toklist *list;
3274 const cpp_token *token;
3275 int flush;
c5a04734 3276{
041c3194
ZW
3277 const cpp_token *limit = list->tokens + list->tokens_used;
3278 const cpp_token *prev = 0;
c5a04734 3279
041c3194
ZW
3280 /* Avoid the CPP_EOF. */
3281 if (list->directive)
3282 limit--;
c5a04734 3283
041c3194 3284 while (token < limit)
c5a04734 3285 {
041c3194
ZW
3286 if (token->type == CPP_MACRO_ARG)
3287 {
3288 if (token->flags & PREV_WHITE)
3289 CPP_PUTC (pfile, ' ');
3290 if (token->flags & STRINGIFY_ARG)
3291 CPP_PUTC (pfile, '#');
3292 dump_param_spelling (pfile, list, token->val.aux);
3293 }
c5a04734 3294 else
041c3194
ZW
3295 output_token (pfile, token, prev);
3296 if (token->flags & PASTE_LEFT)
3297 CPP_PUTS (pfile, " ##", 3);
3298 prev = token;
3299 token++;
c5a04734 3300 }
041c3194
ZW
3301
3302 if (flush && pfile->printer)
3303 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
c5a04734
ZW
3304}
3305
041c3194
ZW
3306/* Allocate pfile->input_buffer, and initialize trigraph_map[]
3307 if it hasn't happened already. */
3308
3309void
3310_cpp_init_input_buffer (pfile)
3311 cpp_reader *pfile;
3312{
417f3e3a
ZW
3313 cpp_context *base;
3314
041c3194 3315 init_trigraph_map ();
417f3e3a
ZW
3316 _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3317 pfile->no_expand_level = UINT_MAX;
041c3194 3318 pfile->context_cap = 20;
041c3194 3319 pfile->cur_context = 0;
041c3194 3320
417f3e3a
ZW
3321 pfile->contexts = (cpp_context *)
3322 xmalloc (pfile->context_cap * sizeof (cpp_context));
041c3194 3323
417f3e3a
ZW
3324 /* Clear the base context. */
3325 base = &pfile->contexts[0];
3326 base->u.list = &pfile->token_list;
3327 base->posn = 0;
3328 base->count = 0;
3329 base->args = 0;
3330 base->level = 0;
3331 base->flags = 0;
3332 base->pushed_token = 0;
041c3194
ZW
3333}
3334
3335/* Moves to the end of the directive line, popping contexts as
3336 necessary. */
3337void
3338_cpp_skip_rest_of_line (pfile)
3339 cpp_reader *pfile;
3340{
417f3e3a
ZW
3341 /* Discard all stacked contexts. */
3342 int i;
3343 for (i = pfile->cur_context; i > 0; i--)
3344 if (pfile->contexts[i].args)
3345 free_macro_args (pfile->contexts[i].args);
3346
3347 if (pfile->no_expand_level <= pfile->cur_context)
3348 pfile->no_expand_level = 0;
3349 pfile->cur_context = 0;
041c3194 3350
417f3e3a
ZW
3351 /* Clear the base context, and clear the directive pointer so that
3352 get_raw_token will advance to the next line. */
3353 pfile->contexts[0].count = 0;
3354 pfile->contexts[0].posn = 0;
041c3194 3355 pfile->token_list.directive = 0;
c5a04734
ZW
3356}
3357
041c3194
ZW
3358/* Directive handler wrapper used by the command line option
3359 processor. */
3360void
3361_cpp_run_directive (pfile, dir, buf, count)
3362 cpp_reader *pfile;
3363 const struct directive *dir;
3364 const char *buf;
3365 size_t count;
3366{
3367 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3368 {
3369 unsigned int prev_lvl = 0;
417f3e3a
ZW
3370
3371 /* Scan the line now, else prevent_macro_expansion won't work. */
3372 lex_next (pfile, 1);
041c3194
ZW
3373 if (! (dir->flags & EXPAND))
3374 prev_lvl = prevent_macro_expansion (pfile);
3375
3376 (void) (*dir->handler) (pfile);
3377
3378 if (! (dir->flags & EXPAND))
3379 restore_macro_expansion (pfile, prev_lvl);
3380
3381 _cpp_skip_rest_of_line (pfile);
3382 cpp_pop_buffer (pfile);
3383 }
3384}
This page took 0.591529 seconds and 5 git commands to generate.