]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
decl.c (duplicate_decls): Preserve DECL_ORIGINAL_TYPE for a TYPE_DECL.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23#include "config.h"
24#include "system.h"
25#include "intl.h"
26#include "cpplib.h"
27#include "cpphash.h"
28
f8f769ea
ZW
29#ifdef HAVE_MMAP_FILE
30# include <sys/mman.h>
31#endif
32
ff2b53ef
ZW
33#define PEEKBUF(BUFFER, N) \
34 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
35#define GETBUF(BUFFER) \
36 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
37#define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
38
39#define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
40#define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
41#define GETC() GETBUF (CPP_BUFFER (pfile))
42#define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
45b966db
ZW
43
44static void skip_block_comment PARAMS ((cpp_reader *));
45static void skip_line_comment PARAMS ((cpp_reader *));
46static int maybe_macroexpand PARAMS ((cpp_reader *, long));
47static int skip_comment PARAMS ((cpp_reader *, int));
48static int copy_comment PARAMS ((cpp_reader *, int));
49static void skip_string PARAMS ((cpp_reader *, int));
50static void parse_string PARAMS ((cpp_reader *, int));
51static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
64aaf407 52static void null_warning PARAMS ((cpp_reader *, unsigned int));
45b966db 53
f2d5f0cc
ZW
54static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
55 size_t, FILE *));
1368ee70
ZW
56static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
57 unsigned int));
58static void bump_column PARAMS ((cpp_printer *, unsigned int,
59 unsigned int));
c5a04734 60static void expand_name_space PARAMS ((cpp_toklist *, unsigned int));
9e62c811
ZW
61static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
62 unsigned int));
f2d5f0cc 63
c5a04734 64#define auto_expand_name_space(list) \
f617b8e2 65 expand_name_space ((list), 1 + (list)->name_cap / 2)
c5a04734 66
b8f41010
NB
67#ifdef NEW_LEXER
68
b8f41010
NB
69void init_trigraph_map PARAMS ((void));
70static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
71 unsigned char *));
72static const unsigned char *backslash_start PARAMS ((cpp_reader *,
73 const unsigned char *));
74static int skip_block_comment2 PARAMS ((cpp_reader *));
75static int skip_line_comment2 PARAMS ((cpp_reader *));
76static void skip_whitespace PARAMS ((cpp_reader *, int));
77static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
78static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
79static void parse_string2 PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
d1d9a6bd 80 unsigned int, int));
b8f41010 81static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
f624ffa7
NB
82static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
83 const unsigned char *,
ad265aa4 84 unsigned int, unsigned int));
b8f41010
NB
85void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
86
87static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
88
d1d9a6bd 89static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
f617b8e2 90 unsigned char *, int));
b8f41010
NB
91
92typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
93 cpp_token *));
94
95/* Macros on a cpp_name. */
d1d9a6bd
NB
96#define INIT_TOKEN_NAME(list, token) \
97 do {(token)->val.name.len = 0; \
98 (token)->val.name.text = (list)->namebuf + (list)->name_used; \
99 (list)->tokens_used = token - (list)->tokens + 1; \
100 } while (0)
b8f41010
NB
101
102/* Maybe put these in the ISTABLE eventually. */
103#define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
104#define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
105
106/* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
107 character, if any, is in buffer. */
108#define handle_newline(cur, limit, c) \
109 do {\
110 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
111 (cur)++; \
112 CPP_BUMP_LINE_CUR (pfile, (cur)); \
6ab3e7dd 113 pfile->col_adjust = 0; \
b8f41010
NB
114 } while (0)
115
116#define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
117#define PREV_TOKEN_TYPE (cur_token[-1].type)
118
f617b8e2
NB
119#define PUSH_TOKEN(ttype) cur_token++->type = ttype
120#define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
121#define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
122#define BACKUP_DIGRAPH(ttype) do { \
123 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
124
125/* An upper bound on the number of bytes needed to spell a token,
126 including preceding whitespace. */
f624ffa7
NB
127#define TOKEN_LEN(token) (5 + (token_spellings[(token)->type].type > \
128 SPELL_NONE ? (token)->val.name.len: 0))
f617b8e2
NB
129
130#endif
131
5d7ee2fa
NB
132/* Order here matters. Those beyond SPELL_NONE store their spelling
133 in the token list, and it's length in the token->val.name.len. */
d1d9a6bd
NB
134enum spell_type
135{
136 SPELL_OPERATOR = 0,
137 SPELL_NONE,
138 SPELL_CHAR, /* FIXME: revert order of NONE and CHAR after transition. */
139 SPELL_IDENT,
140 SPELL_STRING
141};
5d7ee2fa 142
f617b8e2 143#define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
5d7ee2fa
NB
144#define I(e, s) {SPELL_IDENT, s},
145#define S(e, s) {SPELL_STRING, s},
b8f41010
NB
146#define C(e, s) {SPELL_CHAR, s},
147#define N(e, s) {SPELL_NONE, s},
b8f41010
NB
148
149static const struct token_spelling
150{
d1d9a6bd 151 ENUM_BITFIELD(spell_type) type : CHAR_BIT;
f617b8e2 152 const U_CHAR *spelling;
b8f41010
NB
153} token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
154
155#undef T
5d7ee2fa
NB
156#undef I
157#undef S
b8f41010
NB
158#undef C
159#undef N
b8f41010 160
45b966db
ZW
161/* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
162
163void
164_cpp_grow_token_buffer (pfile, n)
165 cpp_reader *pfile;
166 long n;
167{
168 long old_written = CPP_WRITTEN (pfile);
169 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
170 pfile->token_buffer = (U_CHAR *)
171 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
172 CPP_SET_WRITTEN (pfile, old_written);
173}
174
45b966db
ZW
175/* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
176 If BUFFER != NULL, then use the LENGTH characters in BUFFER
177 as the new input buffer.
178 Return the new buffer, or NULL on failure. */
179
180cpp_buffer *
181cpp_push_buffer (pfile, buffer, length)
182 cpp_reader *pfile;
183 const U_CHAR *buffer;
184 long length;
185{
186 cpp_buffer *buf = CPP_BUFFER (pfile);
187 cpp_buffer *new;
188 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
189 {
190 cpp_fatal (pfile, "macro or `#include' recursion too deep");
191 return NULL;
192 }
193
194 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
195
45b966db 196 new->buf = new->cur = buffer;
ff2b53ef 197 new->rlimit = buffer + length;
45b966db 198 new->prev = buf;
ff2b53ef 199 new->mark = NULL;
45b966db
ZW
200 new->line_base = NULL;
201
202 CPP_BUFFER (pfile) = new;
203 return new;
204}
205
206cpp_buffer *
207cpp_pop_buffer (pfile)
208 cpp_reader *pfile;
209{
210 cpp_buffer *buf = CPP_BUFFER (pfile);
211 if (ACTIVE_MARK_P (pfile))
212 cpp_ice (pfile, "mark active in cpp_pop_buffer");
c56c2073 213
c31a6508 214 if (buf->inc)
c56c2073
ZW
215 {
216 _cpp_unwind_if_stack (pfile, buf);
217 if (buf->buf)
218 free ((PTR) buf->buf);
219 if (pfile->system_include_depth)
220 pfile->system_include_depth--;
221 if (pfile->potential_control_macro)
222 {
c31a6508
ZW
223 if (buf->inc->cmacro != NEVER_REREAD)
224 buf->inc->cmacro = pfile->potential_control_macro;
c56c2073
ZW
225 pfile->potential_control_macro = 0;
226 }
227 pfile->input_stack_listing_current = 0;
c31a6508
ZW
228 /* If the file will not be included again, then close it. */
229 if (DO_NOT_REREAD (buf->inc))
230 {
231 close (buf->inc->fd);
232 buf->inc->fd = -1;
233 }
c56c2073
ZW
234 }
235 else if (buf->macro)
236 {
f8f769ea 237 cpp_hashnode *m = buf->macro;
c56c2073
ZW
238
239 m->disabled = 0;
240 if ((m->type == T_FMACRO && buf->mapped)
241 || m->type == T_SPECLINE || m->type == T_FILE
242 || m->type == T_BASE_FILE || m->type == T_INCLUDE_LEVEL
243 || m->type == T_STDC)
244 free ((PTR) buf->buf);
245 }
45b966db
ZW
246 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
247 free (buf);
248 pfile->buffer_stack_depth--;
249 return CPP_BUFFER (pfile);
250}
251
f2d5f0cc
ZW
252/* Deal with the annoying semantics of fwrite. */
253static void
254safe_fwrite (pfile, buf, len, fp)
255 cpp_reader *pfile;
256 const U_CHAR *buf;
257 size_t len;
258 FILE *fp;
259{
260 size_t count;
45b966db 261
f2d5f0cc
ZW
262 while (len)
263 {
264 count = fwrite (buf, 1, len, fp);
265 if (count == 0)
266 goto error;
267 len -= count;
268 buf += count;
269 }
270 return;
271
272 error:
273 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
274}
275
276/* Notify the compiler proper that the current line number has jumped,
277 or the current file name has changed. */
278
279static void
1368ee70 280output_line_command (pfile, print, line)
45b966db 281 cpp_reader *pfile;
f2d5f0cc 282 cpp_printer *print;
1368ee70 283 unsigned int line;
45b966db 284{
1368ee70 285 cpp_buffer *ip = cpp_file_buffer (pfile);
f2d5f0cc
ZW
286 enum { same = 0, enter, leave, rname } change;
287 static const char * const codes[] = { "", " 1", " 2", "" };
288
289 if (CPP_OPTION (pfile, no_line_commands))
290 return;
291
0e500c78
JJ
292 /* Determine whether the current filename has changed, and if so,
293 how. 'nominal_fname' values are unique, so they can be compared
294 by comparing pointers. */
295 if (ip->nominal_fname == print->last_fname)
296 change = same;
54bef41d
JJ
297 else
298 {
0e500c78
JJ
299 if (pfile->buffer_stack_depth == print->last_bsd)
300 change = rname;
f2d5f0cc 301 else
0e500c78
JJ
302 {
303 if (pfile->buffer_stack_depth > print->last_bsd)
304 change = enter;
305 else
306 change = leave;
307 print->last_bsd = pfile->buffer_stack_depth;
308 }
309 print->last_fname = ip->nominal_fname;
45b966db 310 }
f2d5f0cc
ZW
311 /* If the current file has not changed, we can output a few newlines
312 instead if we want to increase the line number by a small amount.
313 We cannot do this if print->lineno is zero, because that means we
314 haven't output any line commands yet. (The very first line
315 command output is a `same_file' command.) */
316 if (change == same && print->lineno != 0
317 && line >= print->lineno && line < print->lineno + 8)
45b966db 318 {
f2d5f0cc 319 while (line > print->lineno)
45b966db 320 {
f2d5f0cc
ZW
321 putc ('\n', print->outf);
322 print->lineno++;
45b966db 323 }
f2d5f0cc 324 return;
45b966db 325 }
f2d5f0cc
ZW
326
327#ifndef NO_IMPLICIT_EXTERN_C
328 if (CPP_OPTION (pfile, cplusplus))
329 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
330 codes[change],
c31a6508
ZW
331 ip->inc->sysp ? " 3" : "",
332 (ip->inc->sysp == 2) ? " 4" : "");
f2d5f0cc
ZW
333 else
334#endif
335 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
336 codes[change],
c31a6508 337 ip->inc->sysp ? " 3" : "");
f2d5f0cc
ZW
338 print->lineno = line;
339}
340
341/* Write the contents of the token_buffer to the output stream, and
342 clear the token_buffer. Also handles generating line commands and
343 keeping track of file transitions. */
344
345void
346cpp_output_tokens (pfile, print)
347 cpp_reader *pfile;
348 cpp_printer *print;
349{
1368ee70
ZW
350 cpp_buffer *ip;
351
f6fab919
ZW
352 if (CPP_WRITTEN (pfile) - print->written)
353 {
354 if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
355 print->lineno++;
356 safe_fwrite (pfile, pfile->token_buffer,
357 CPP_WRITTEN (pfile) - print->written, print->outf);
358 }
1368ee70
ZW
359
360 ip = cpp_file_buffer (pfile);
361 if (ip)
362 output_line_command (pfile, print, CPP_BUF_LINE (ip));
363
f2d5f0cc 364 CPP_SET_WRITTEN (pfile, print->written);
45b966db
ZW
365}
366
1368ee70
ZW
367/* Helper for cpp_output_list - increases the column number to match
368 what we expect it to be. */
369
370static void
371bump_column (print, from, to)
372 cpp_printer *print;
373 unsigned int from, to;
374{
375 unsigned int tabs, spcs;
376 unsigned int delta = to - from;
377
378 /* Only if FROM is 0, advance by tabs. */
379 if (from == 0)
380 tabs = delta / 8, spcs = delta % 8;
381 else
382 tabs = 0, spcs = delta;
383
384 while (tabs--) putc ('\t', print->outf);
385 while (spcs--) putc (' ', print->outf);
386}
387
388/* Write out the list L onto pfile->token_buffer. This function is
389 incomplete:
390
391 1) pfile->token_buffer is not going to continue to exist.
392 2) At the moment, tokens don't carry the information described
393 in cpplib.h; they are all strings.
394 3) The list has to be a complete line, and has to be written starting
395 at the beginning of a line. */
396
397void
398cpp_output_list (pfile, print, list)
399 cpp_reader *pfile;
400 cpp_printer *print;
401 const cpp_toklist *list;
402{
403 unsigned int i;
404 unsigned int curcol = 1;
405
406 /* XXX Probably does not do what is intended. */
407 if (print->lineno != list->line)
408 output_line_command (pfile, print, list->line);
409
410 for (i = 0; i < list->tokens_used; i++)
411 {
1920de47 412 if (TOK_TYPE (list, i) == CPP_VSPACE)
1368ee70
ZW
413 {
414 output_line_command (pfile, print, list->tokens[i].aux);
415 continue;
416 }
417
1920de47 418 if (curcol < TOK_COL (list, i))
1368ee70
ZW
419 {
420 /* Insert space to bring the column to what it should be. */
1920de47
ZW
421 bump_column (print, curcol - 1, TOK_COL (list, i));
422 curcol = TOK_COL (list, i);
1368ee70
ZW
423 }
424 /* XXX We may have to insert space to prevent an accidental
425 token paste. */
1920de47
ZW
426 safe_fwrite (pfile, TOK_NAME (list, i), TOK_LEN (list, i), print->outf);
427 curcol += TOK_LEN (list, i);
1368ee70
ZW
428 }
429}
430
f2d5f0cc
ZW
431/* Scan a string (which may have escape marks), perform macro expansion,
432 and write the result to the token_buffer. */
45b966db
ZW
433
434void
f2d5f0cc 435_cpp_expand_to_buffer (pfile, buf, length)
45b966db
ZW
436 cpp_reader *pfile;
437 const U_CHAR *buf;
438 int length;
439{
c56c2073 440 cpp_buffer *stop;
f2d5f0cc 441 enum cpp_ttype token;
f6fab919 442 U_CHAR *buf1;
45b966db
ZW
443
444 if (length < 0)
445 {
446 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
447 return;
448 }
449
f6fab919
ZW
450 /* Copy the buffer, because it might be in an unsafe place - for
451 example, a sequence on the token_buffer, where the pointers will
452 be invalidated if we enlarge the token_buffer. */
453 buf1 = alloca (length);
454 memcpy (buf1, buf, length);
455
45b966db 456 /* Set up the input on the input stack. */
c56c2073
ZW
457 stop = CPP_BUFFER (pfile);
458 if (cpp_push_buffer (pfile, buf1, length) == NULL)
45b966db 459 return;
c56c2073 460 CPP_BUFFER (pfile)->has_escapes = 1;
45b966db
ZW
461
462 /* Scan the input, create the output. */
f2d5f0cc
ZW
463 for (;;)
464 {
465 token = cpp_get_token (pfile);
c56c2073 466 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
f2d5f0cc 467 break;
f2d5f0cc 468 }
45b966db
ZW
469}
470
c56c2073 471/* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
f2d5f0cc
ZW
472
473void
474cpp_scan_buffer_nooutput (pfile)
475 cpp_reader *pfile;
476{
c56c2073 477 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
f2d5f0cc
ZW
478 enum cpp_ttype token;
479 unsigned int old_written = CPP_WRITTEN (pfile);
480 /* In no-output mode, we can ignore everything but directives. */
481 for (;;)
482 {
483 if (! pfile->only_seen_white)
484 _cpp_skip_rest_of_line (pfile);
485 token = cpp_get_token (pfile);
c56c2073 486 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
f2d5f0cc 487 break;
f2d5f0cc
ZW
488 }
489 CPP_SET_WRITTEN (pfile, old_written);
490}
491
c56c2073 492/* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
f2d5f0cc
ZW
493
494void
495cpp_scan_buffer (pfile, print)
496 cpp_reader *pfile;
497 cpp_printer *print;
498{
c56c2073 499 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
f2d5f0cc
ZW
500 enum cpp_ttype token;
501
502 for (;;)
503 {
504 token = cpp_get_token (pfile);
15dad1d9 505 if (token == CPP_VSPACE || token == CPP_EOF
f2d5f0cc
ZW
506 /* XXX Temporary kluge - force flush after #include only */
507 || (token == CPP_DIRECTIVE
508 && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
509 {
510 cpp_output_tokens (pfile, print);
c56c2073 511 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
f2d5f0cc 512 return;
f2d5f0cc
ZW
513 }
514 }
515}
516
45b966db
ZW
517/* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
518
519cpp_buffer *
520cpp_file_buffer (pfile)
521 cpp_reader *pfile;
522{
523 cpp_buffer *ip;
524
525 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
c31a6508 526 if (ip->inc != NULL)
45b966db
ZW
527 return ip;
528 return NULL;
529}
530
1368ee70
ZW
531/* Token-buffer helper functions. */
532
d1d9a6bd
NB
533/* Expand a token list's string space. It is *vital* that
534 list->tokens_used is correct, to get pointer fix-up right. */
1368ee70 535static void
c5a04734 536expand_name_space (list, len)
1368ee70 537 cpp_toklist *list;
c5a04734
ZW
538 unsigned int len;
539{
f617b8e2 540 const U_CHAR *old_namebuf;
f617b8e2
NB
541
542 old_namebuf = list->namebuf;
c5a04734
ZW
543 list->name_cap += len;
544 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
f617b8e2
NB
545
546 /* Fix up token text pointers. */
79f50f2a 547 if (list->namebuf != old_namebuf)
f617b8e2
NB
548 {
549 unsigned int i;
550
551 for (i = 0; i < list->tokens_used; i++)
552 if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
79f50f2a 553 list->tokens[i].val.name.text += (list->namebuf - old_namebuf);
f617b8e2 554 }
1368ee70
ZW
555}
556
557/* Expand the number of tokens in a list. */
d1d9a6bd
NB
558void
559_cpp_expand_token_space (list, count)
1368ee70 560 cpp_toklist *list;
d1d9a6bd 561 unsigned int count;
1368ee70 562{
d1d9a6bd
NB
563 unsigned int n;
564
565 list->tokens_cap += count;
566 n = list->tokens_cap;
15dad1d9 567 if (list->flags & LIST_OFFSET)
d1d9a6bd 568 list->tokens--, n++;
1368ee70 569 list->tokens = (cpp_token *)
d1d9a6bd 570 xrealloc (list->tokens, n * sizeof (cpp_token));
15dad1d9
ZW
571 if (list->flags & LIST_OFFSET)
572 list->tokens++; /* Skip the dummy. */
1368ee70
ZW
573}
574
d1d9a6bd
NB
575/* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
576 an extra token in front of the token list, as this allows the lexer
577 to always peek at the previous token without worrying about
578 underflowing the list, and some initial space. Otherwise, no
579 token- or name-space is allocated, and there is no dummy token. */
15dad1d9 580void
d1d9a6bd 581_cpp_init_toklist (list, flags)
1368ee70 582 cpp_toklist *list;
d1d9a6bd 583 int flags;
1368ee70 584{
d1d9a6bd
NB
585 /* We malloc zero bytes because we may want to realloc later, and
586 some old implementations don't like realloc-ing a null pointer. */
587 if (flags == NO_DUMMY_TOKEN)
588 {
589 list->tokens_cap = 0;
590 list->tokens = (cpp_token *) malloc (0);
591 list->name_cap = 0;
592 list->flags = 0;
593 }
594 else
595 {
596 /* Initialize token space. Put a dummy token before the start
597 that will fail matches. */
598 list->tokens_cap = 256; /* 4K's worth. */
599 list->tokens = (cpp_token *)
600 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
601 list->tokens[0].type = CPP_EOF;
602 list->tokens++;
603
604 /* Initialize name space. */
605 list->name_cap = 1024;
606 list->flags = LIST_OFFSET;
607 }
15dad1d9 608
d1d9a6bd 609 /* Allocate name space. */
15dad1d9
ZW
610 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
611
15dad1d9
ZW
612 _cpp_clear_toklist (list);
613}
1368ee70 614
15dad1d9
ZW
615/* Clear a token list. */
616void
617_cpp_clear_toklist (list)
618 cpp_toklist *list;
619{
c5a04734
ZW
620 list->tokens_used = 0;
621 list->name_used = 0;
15dad1d9
ZW
622 list->dirno = -1;
623 list->flags &= LIST_OFFSET; /* clear all but that one */
624}
625
626/* Free a token list. Does not free the list itself, which may be
627 embedded in a larger structure. */
628void
629_cpp_free_toklist (list)
630 cpp_toklist *list;
631{
15dad1d9
ZW
632 if (list->flags & LIST_OFFSET)
633 free (list->tokens - 1); /* Backup over dummy token. */
634 else
635 free (list->tokens);
636 free (list->namebuf);
1368ee70
ZW
637}
638
15dad1d9
ZW
639/* Slice a token list: copy the sublist [START, FINISH) into COPY.
640 COPY is assumed not to be initialized. The comment space is not
641 copied. */
642void
643_cpp_slice_toklist (copy, start, finish)
644 cpp_toklist *copy;
645 const cpp_token *start, *finish;
646{
647 unsigned int i, n;
648 size_t bytes;
649
650 n = finish - start;
651 copy->tokens_cap = n;
652 copy->tokens = (cpp_token *) xmalloc (n * sizeof (cpp_token));
653 memcpy (copy->tokens, start, n * sizeof (cpp_token));
654
655 bytes = 0;
656 for (i = 0; i < n; i++)
657 if (token_spellings[start[i].type].type > SPELL_NONE)
658 bytes += start[i].val.name.len;
659
660 copy->namebuf = xmalloc (bytes);
661 bytes = 0;
662 for (i = 0; i < n; i++)
663 if (token_spellings[start[i].type].type > SPELL_NONE)
664 {
665 memcpy (copy->namebuf + bytes,
666 start[i].val.name.text, start[i].val.name.len);
667 copy->tokens[i].val.name.text = copy->namebuf + bytes;
668 bytes += start[i].val.name.len;
669 }
670
671 copy->tokens_cap = n;
672 copy->tokens_used = n;
673 copy->name_used = bytes;
674 copy->name_cap = bytes;
15dad1d9
ZW
675
676 copy->flags = 0;
677 copy->dirno = -1;
678}
1368ee70 679
15dad1d9 680/* Shrink a token list down to the minimum size. */
1368ee70 681void
15dad1d9
ZW
682_cpp_squeeze_toklist (list)
683 cpp_toklist *list;
684{
685 long delta;
686 const U_CHAR *old_namebuf;
687
688 if (list->flags & LIST_OFFSET)
689 {
690 list->tokens--;
691 memmove (list->tokens, list->tokens + 1,
692 list->tokens_used * sizeof (cpp_token));
693 list->tokens = xrealloc (list->tokens,
694 list->tokens_used * sizeof (cpp_token));
695 list->flags &= ~LIST_OFFSET;
696 }
697 else
698 list->tokens = xrealloc (list->tokens,
699 list->tokens_used * sizeof (cpp_token));
700 list->tokens_cap = list->tokens_used;
701
702 old_namebuf = list->namebuf;
703 list->namebuf = xrealloc (list->namebuf, list->name_used);
704 list->name_cap = list->name_used;
705
706 /* Fix up token text pointers. */
707 delta = list->namebuf - old_namebuf;
708 if (delta)
709 {
710 unsigned int i;
711
712 for (i = 0; i < list->tokens_used; i++)
713 if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
714 list->tokens[i].val.name.text += delta;
715 }
15dad1d9
ZW
716}
717
718/* Compare two tokens. */
719int
720_cpp_equiv_tokens (a, b)
721 const cpp_token *a, *b;
722{
723 if (a->type != b->type
724 || a->flags != b->flags
725 || a->aux != b->aux)
726 return 0;
727
728 if (token_spellings[a->type].type > SPELL_NONE)
729 {
730 if (a->val.name.len != b->val.name.len
731 || ustrncmp(a->val.name.text,
732 b->val.name.text,
733 a->val.name.len))
734 return 0;
735 }
736 return 1;
737}
738
739/* Compare two token lists. */
740int
741_cpp_equiv_toklists (a, b)
742 const cpp_toklist *a, *b;
743{
744 unsigned int i;
745
746 if (a->tokens_used != b->tokens_used)
747 return 0;
748
749 for (i = 0; i < a->tokens_used; i++)
750 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
751 return 0;
752 return 1;
753}
754
755/* Scan until we encounter a token of type STOP or a newline, and
756 create a token list for it. Does not macro-expand or execute
757 directives. The final token is not included in the list or
758 consumed from the input. Returns the type of the token stopped at. */
759
760enum cpp_ttype
761_cpp_scan_until (pfile, list, stop)
1368ee70
ZW
762 cpp_reader *pfile;
763 cpp_toklist *list;
15dad1d9 764 enum cpp_ttype stop;
1368ee70
ZW
765{
766 int i, col;
767 long written, len;
768 enum cpp_ttype type;
9e62c811 769 int space_before;
1368ee70 770
15dad1d9
ZW
771 _cpp_clear_toklist (list);
772 list->line = CPP_BUF_LINE (CPP_BUFFER (pfile));
1368ee70
ZW
773
774 written = CPP_WRITTEN (pfile);
775 i = 0;
9e62c811 776 space_before = 0;
1368ee70
ZW
777 for (;;)
778 {
779 col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
780 type = _cpp_lex_token (pfile);
781 len = CPP_WRITTEN (pfile) - written;
782 CPP_SET_WRITTEN (pfile, written);
783 if (type == CPP_HSPACE)
9e62c811
ZW
784 {
785 if (CPP_PEDANTIC (pfile))
786 pedantic_whitespace (pfile, pfile->token_buffer + written, len);
787 space_before = 1;
788 continue;
789 }
0f89df67
ZW
790 else if (type == CPP_COMMENT)
791 /* Only happens when processing -traditional macro definitions.
792 Do not give this a token entry, but do not change space_before
793 either. */
794 continue;
1368ee70
ZW
795
796 if (list->tokens_used >= list->tokens_cap)
d1d9a6bd 797 _cpp_expand_token_space (list, 256);
1368ee70 798 if (list->name_used + len >= list->name_cap)
bb1ec1d7 799 expand_name_space (list, list->name_used + len + 1 - list->name_cap);
1368ee70 800
9e62c811
ZW
801 if (type == CPP_MACRO)
802 type = CPP_NAME;
803
15dad1d9
ZW
804 if (type == CPP_VSPACE || type == stop)
805 break;
806
1368ee70 807 list->tokens_used++;
1920de47
ZW
808 TOK_TYPE (list, i) = type;
809 TOK_COL (list, i) = col;
a58f64f5 810 TOK_AUX (list, i) = 0;
1920de47 811 TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
9e62c811 812
1920de47 813 TOK_LEN (list, i) = len;
f617b8e2
NB
814 if (token_spellings[type].type > SPELL_NONE)
815 {
816 memcpy (list->namebuf + list->name_used, CPP_PWRITTEN (pfile), len);
817 TOK_NAME (list, i) = list->namebuf + list->name_used;
818 list->name_used += len;
819 }
820 else
821 TOK_NAME (list, i) = token_spellings[type].spelling;
1368ee70 822 i++;
9e62c811 823 space_before = 0;
1368ee70 824 }
9e62c811 825
15dad1d9 826 /* XXX Temporary kluge: put back the newline (or whatever). */
9e62c811 827 FORWARD(-1);
1368ee70 828
15dad1d9
ZW
829 /* Don't consider the first token to have white before. */
830 TOK_FLAGS (list, 0) &= ~PREV_WHITESPACE;
831 return type;
832}
1368ee70 833
45b966db
ZW
834/* Skip a C-style block comment. We know it's a comment, and point is
835 at the second character of the starter. */
836static void
837skip_block_comment (pfile)
838 cpp_reader *pfile;
839{
3a2b2c7a 840 unsigned int line, col;
61474454 841 const U_CHAR *limit, *cur;
45b966db
ZW
842
843 FORWARD(1);
3a2b2c7a
ZW
844 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
845 col = CPP_BUF_COL (CPP_BUFFER (pfile));
61474454
NB
846 limit = CPP_BUFFER (pfile)->rlimit;
847 cur = CPP_BUFFER (pfile)->cur;
848
849 while (cur < limit)
45b966db 850 {
61474454
NB
851 char c = *cur++;
852 if (c == '\n' || c == '\r')
45b966db
ZW
853 {
854 /* \r cannot be a macro escape marker here. */
855 if (!ACTIVE_MARK_P (pfile))
61474454
NB
856 CPP_BUMP_LINE_CUR (pfile, cur);
857 }
858 else if (c == '*')
859 {
860 /* Check for teminator. */
861 if (cur < limit && *cur == '/')
862 goto out;
863
864 /* Warn about comment starter embedded in comment. */
865 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
866 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
867 cur - CPP_BUFFER (pfile)->line_base,
868 "'/*' within comment");
45b966db 869 }
45b966db 870 }
61474454
NB
871
872 cpp_error_with_line (pfile, line, col, "unterminated comment");
873 cur--;
874 out:
875 CPP_BUFFER (pfile)->cur = cur + 1;
45b966db
ZW
876}
877
878/* Skip a C++/Chill line comment. We know it's a comment, and point
879 is at the second character of the initiator. */
880static void
881skip_line_comment (pfile)
882 cpp_reader *pfile;
883{
884 FORWARD(1);
885 for (;;)
886 {
887 int c = GETC ();
888
889 /* We don't have to worry about EOF in here. */
890 if (c == '\n')
891 {
892 /* Don't consider final '\n' to be part of comment. */
893 FORWARD(-1);
894 return;
895 }
896 else if (c == '\r')
897 {
898 /* \r cannot be a macro escape marker here. */
899 if (!ACTIVE_MARK_P (pfile))
900 CPP_BUMP_LINE (pfile);
ae79697b 901 if (CPP_OPTION (pfile, warn_comments))
45b966db
ZW
902 cpp_warning (pfile, "backslash-newline within line comment");
903 }
904 }
905}
906
907/* Skip a comment - C, C++, or Chill style. M is the first character
908 of the comment marker. If this really is a comment, skip to its
909 end and return ' '. If this is not a comment, return M (which will
910 be '/' or '-'). */
911
912static int
913skip_comment (pfile, m)
914 cpp_reader *pfile;
915 int m;
916{
917 if (m == '/' && PEEKC() == '*')
918 {
919 skip_block_comment (pfile);
920 return ' ';
921 }
922 else if (m == '/' && PEEKC() == '/')
923 {
c31a6508 924 if (CPP_IN_SYSTEM_HEADER (pfile))
45b966db
ZW
925 {
926 /* We silently allow C++ comments in system headers, irrespective
927 of conformance mode, because lots of busted systems do that
928 and trying to clean it up in fixincludes is a nightmare. */
929 skip_line_comment (pfile);
930 return ' ';
931 }
ae79697b 932 else if (CPP_OPTION (pfile, cplusplus_comments))
45b966db 933 {
0f89df67 934 if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
45b966db 935 {
0f89df67
ZW
936 if (CPP_WTRADITIONAL (pfile))
937 cpp_pedwarn (pfile,
938 "C++ style comments are not allowed in traditional C");
939 else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
940 cpp_pedwarn (pfile,
941 "C++ style comments are not allowed in ISO C89");
942 if (CPP_WTRADITIONAL (pfile)
943 || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
944 cpp_pedwarn (pfile,
45b966db
ZW
945 "(this will be reported only once per input file)");
946 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
947 }
948 skip_line_comment (pfile);
949 return ' ';
950 }
951 else
952 return m;
953 }
954 else if (m == '-' && PEEKC() == '-'
ae79697b 955 && CPP_OPTION (pfile, chill))
45b966db
ZW
956 {
957 skip_line_comment (pfile);
958 return ' ';
959 }
960 else
961 return m;
962}
963
964/* Identical to skip_comment except that it copies the comment into the
965 token_buffer. This is used if !discard_comments. */
966static int
967copy_comment (pfile, m)
968 cpp_reader *pfile;
969 int m;
970{
971 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
972 const U_CHAR *limit;
973
974 if (skip_comment (pfile, m) == m)
975 return m;
976
977 limit = CPP_BUFFER (pfile)->cur;
978 CPP_RESERVE (pfile, limit - start + 2);
979 CPP_PUTC_Q (pfile, m);
980 for (; start <= limit; start++)
981 if (*start != '\r')
982 CPP_PUTC_Q (pfile, *start);
983
984 return ' ';
985}
986
64aaf407
NB
987static void
988null_warning (pfile, count)
989 cpp_reader *pfile;
990 unsigned int count;
991{
992 if (count == 1)
993 cpp_warning (pfile, "embedded null character ignored");
994 else
995 cpp_warning (pfile, "embedded null characters ignored");
996}
997
45b966db
ZW
998/* Skip whitespace \-newline and comments. Does not macro-expand. */
999
1000void
1001_cpp_skip_hspace (pfile)
1002 cpp_reader *pfile;
1003{
64aaf407 1004 unsigned int null_count = 0;
45b966db 1005 int c;
64aaf407 1006
45b966db
ZW
1007 while (1)
1008 {
1009 c = GETC();
1010 if (c == EOF)
64aaf407 1011 goto out;
45b966db
ZW
1012 else if (is_hspace(c))
1013 {
1014 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
1015 cpp_pedwarn (pfile, "%s in preprocessing directive",
1016 c == '\f' ? "formfeed" : "vertical tab");
64aaf407
NB
1017 else if (c == '\0')
1018 null_count++;
45b966db
ZW
1019 }
1020 else if (c == '\r')
1021 {
1022 /* \r is a backslash-newline marker if !has_escapes, and
1023 a deletable-whitespace or no-reexpansion marker otherwise. */
1024 if (CPP_BUFFER (pfile)->has_escapes)
1025 {
1026 if (PEEKC() == ' ')
1027 FORWARD(1);
1028 else
1029 break;
1030 }
1031 else
1032 CPP_BUMP_LINE (pfile);
1033 }
1034 else if (c == '/' || c == '-')
1035 {
1036 c = skip_comment (pfile, c);
1037 if (c != ' ')
1038 break;
1039 }
1040 else
1041 break;
1042 }
1043 FORWARD(-1);
64aaf407
NB
1044 out:
1045 if (null_count)
1046 null_warning (pfile, null_count);
45b966db
ZW
1047}
1048
1049/* Read and discard the rest of the current line. */
1050
1051void
1052_cpp_skip_rest_of_line (pfile)
1053 cpp_reader *pfile;
1054{
1055 for (;;)
1056 {
1057 int c = GETC();
1058 switch (c)
1059 {
1060 case '\n':
1061 FORWARD(-1);
1062 case EOF:
1063 return;
1064
1065 case '\r':
1066 if (! CPP_BUFFER (pfile)->has_escapes)
1067 CPP_BUMP_LINE (pfile);
1068 break;
1069
1070 case '\'':
1071 case '\"':
1072 skip_string (pfile, c);
1073 break;
1074
1075 case '/':
1076 case '-':
1077 skip_comment (pfile, c);
1078 break;
1079
1080 case '\f':
1081 case '\v':
1082 if (CPP_PEDANTIC (pfile))
1083 cpp_pedwarn (pfile, "%s in preprocessing directive",
1084 c == '\f' ? "formfeed" : "vertical tab");
1085 break;
1086
1087 }
1088 }
1089}
1090
1091/* Parse an identifier starting with C. */
1092
1093void
1094_cpp_parse_name (pfile, c)
1095 cpp_reader *pfile;
1096 int c;
1097{
1098 for (;;)
1099 {
1100 if (! is_idchar(c))
1101 {
1102 FORWARD (-1);
1103 break;
1104 }
1105
e5ec2402
ZW
1106 /* $ is not a legal identifier character in the standard, but is
1107 commonly accepted as an extension. Don't warn about it in
1108 skipped conditional blocks. */
1109 if (c == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
45b966db
ZW
1110 cpp_pedwarn (pfile, "`$' in identifier");
1111
1112 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
1113 CPP_PUTC_Q (pfile, c);
1114 c = GETC();
1115 if (c == EOF)
1116 break;
1117 }
45b966db
ZW
1118 return;
1119}
1120
1121/* Parse and skip over a string starting with C. A single quoted
1122 string is treated like a double -- some programs (e.g., troff) are
1123 perverse this way. (However, a single quoted string is not allowed
1124 to extend over multiple lines.) */
1125static void
1126skip_string (pfile, c)
1127 cpp_reader *pfile;
1128 int c;
1129{
3a2b2c7a 1130 unsigned int start_line, start_column;
64aaf407 1131 unsigned int null_count = 0;
45b966db 1132
3a2b2c7a
ZW
1133 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
1134 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
45b966db
ZW
1135 while (1)
1136 {
1137 int cc = GETC();
1138 switch (cc)
1139 {
1140 case EOF:
1141 cpp_error_with_line (pfile, start_line, start_column,
1142 "unterminated string or character constant");
1143 if (pfile->multiline_string_line != start_line
1144 && pfile->multiline_string_line != 0)
1145 cpp_error_with_line (pfile,
1146 pfile->multiline_string_line, -1,
1147 "possible real start of unterminated constant");
1148 pfile->multiline_string_line = 0;
64aaf407 1149 goto out;
45b966db 1150
64aaf407
NB
1151 case '\0':
1152 null_count++;
1153 break;
1154
45b966db
ZW
1155 case '\n':
1156 CPP_BUMP_LINE (pfile);
1157 /* In Fortran and assembly language, silently terminate
1158 strings of either variety at end of line. This is a
1159 kludge around not knowing where comments are in these
1160 languages. */
ae79697b
ZW
1161 if (CPP_OPTION (pfile, lang_fortran)
1162 || CPP_OPTION (pfile, lang_asm))
45b966db
ZW
1163 {
1164 FORWARD(-1);
64aaf407 1165 goto out;
45b966db
ZW
1166 }
1167 /* Character constants may not extend over multiple lines.
1168 In Standard C, neither may strings. We accept multiline
1169 strings as an extension. */
1170 if (c == '\'')
1171 {
1172 cpp_error_with_line (pfile, start_line, start_column,
1173 "unterminated character constant");
1174 FORWARD(-1);
64aaf407 1175 goto out;
45b966db
ZW
1176 }
1177 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
1178 cpp_pedwarn_with_line (pfile, start_line, start_column,
1179 "string constant runs past end of line");
1180 if (pfile->multiline_string_line == 0)
1181 pfile->multiline_string_line = start_line;
1182 break;
1183
1184 case '\r':
1185 if (CPP_BUFFER (pfile)->has_escapes)
1186 {
1187 cpp_ice (pfile, "\\r escape inside string constant");
1188 FORWARD(1);
1189 }
1190 else
1191 /* Backslash newline is replaced by nothing at all. */
1192 CPP_BUMP_LINE (pfile);
1193 break;
1194
1195 case '\\':
1196 FORWARD(1);
1197 break;
1198
1199 case '\"':
1200 case '\'':
1201 if (cc == c)
64aaf407 1202 goto out;
45b966db
ZW
1203 break;
1204 }
1205 }
64aaf407
NB
1206
1207 out:
1208 if (null_count == 1)
1209 cpp_warning (pfile, "null character in string or character constant");
1210 else if (null_count > 1)
1211 cpp_warning (pfile, "null characters in string or character constant");
45b966db
ZW
1212}
1213
1214/* Parse a string and copy it to the output. */
1215
1216static void
1217parse_string (pfile, c)
1218 cpp_reader *pfile;
1219 int c;
1220{
1221 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
1222 const U_CHAR *limit;
1223
1224 skip_string (pfile, c);
1225
1226 limit = CPP_BUFFER (pfile)->cur;
1227 CPP_RESERVE (pfile, limit - start + 2);
1228 CPP_PUTC_Q (pfile, c);
1229 for (; start < limit; start++)
1230 if (*start != '\r')
1231 CPP_PUTC_Q (pfile, *start);
1232}
1233
45b966db
ZW
1234/* Get the next token, and add it to the text in pfile->token_buffer.
1235 Return the kind of token we got. */
1236
3a2b2c7a 1237enum cpp_ttype
45b966db
ZW
1238_cpp_lex_token (pfile)
1239 cpp_reader *pfile;
1240{
5eec0563 1241 register int c, c2;
3a2b2c7a 1242 enum cpp_ttype token;
45b966db 1243
f2d5f0cc
ZW
1244 if (CPP_BUFFER (pfile) == NULL)
1245 return CPP_EOF;
1246
45b966db
ZW
1247 get_next:
1248 c = GETC();
1249 switch (c)
1250 {
1251 case EOF:
1252 return CPP_EOF;
1253
1254 case '/':
1255 if (PEEKC () == '=')
1256 goto op2;
1257
1258 comment:
ae79697b 1259 if (CPP_OPTION (pfile, discard_comments))
45b966db
ZW
1260 c = skip_comment (pfile, c);
1261 else
1262 c = copy_comment (pfile, c);
1263 if (c != ' ')
1264 goto randomchar;
1265
1266 /* Comments are equivalent to spaces.
1267 For -traditional, a comment is equivalent to nothing. */
ff2b53ef 1268 if (!CPP_OPTION (pfile, discard_comments))
45b966db 1269 return CPP_COMMENT;
9e62c811 1270 else if (CPP_TRADITIONAL (pfile))
15dad1d9 1271 goto get_next;
45b966db
ZW
1272 else
1273 {
1274 CPP_PUTC (pfile, c);
1275 return CPP_HSPACE;
1276 }
1277
1278 case '#':
5eec0563
JM
1279 CPP_PUTC (pfile, c);
1280
1281 hash:
15dad1d9
ZW
1282 c2 = PEEKC ();
1283 if (c2 == '#')
45b966db 1284 {
15dad1d9
ZW
1285 FORWARD (1);
1286 CPP_PUTC (pfile, c2);
1287 return CPP_PASTE;
45b966db 1288 }
15dad1d9 1289 else if (c2 == '%' && PEEKN (1) == ':')
45b966db 1290 {
15dad1d9
ZW
1291 /* Digraph: "%:" == "#". */
1292 FORWARD (1);
1293 CPP_RESERVE (pfile, 2);
1294 CPP_PUTC_Q (pfile, c2);
1295 CPP_PUTC_Q (pfile, GETC ());
1368ee70 1296 return CPP_PASTE;
45b966db 1297 }
15dad1d9
ZW
1298 else
1299 return CPP_HASH;
45b966db
ZW
1300
1301 case '\"':
1302 case '\'':
1303 parse_string (pfile, c);
45b966db
ZW
1304 return c == '\'' ? CPP_CHAR : CPP_STRING;
1305
1306 case '$':
ae79697b 1307 if (!CPP_OPTION (pfile, dollars_in_ident))
45b966db
ZW
1308 goto randomchar;
1309 goto letter;
1310
1311 case ':':
5eec0563
JM
1312 c2 = PEEKC ();
1313 /* Digraph: ":>" == "]". */
1314 if (c2 == '>'
1315 || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
45b966db
ZW
1316 goto op2;
1317 goto randomchar;
1318
1319 case '&':
1320 case '+':
1321 case '|':
1322 c2 = PEEKC ();
1323 if (c2 == c || c2 == '=')
1324 goto op2;
1325 goto randomchar;
1326
5eec0563
JM
1327 case '%':
1328 /* Digraphs: "%:" == "#", "%>" == "}". */
1329 c2 = PEEKC ();
1330 if (c2 == ':')
1331 {
1332 FORWARD (1);
1333 CPP_RESERVE (pfile, 2);
1334 CPP_PUTC_Q (pfile, c);
1335 CPP_PUTC_Q (pfile, c2);
1336 goto hash;
1337 }
1338 else if (c2 == '>')
1339 {
1340 FORWARD (1);
1341 CPP_RESERVE (pfile, 2);
1342 CPP_PUTC_Q (pfile, c);
1343 CPP_PUTC_Q (pfile, c2);
1368ee70 1344 return CPP_OPEN_BRACE;
5eec0563
JM
1345 }
1346 /* else fall through */
1347
45b966db
ZW
1348 case '*':
1349 case '!':
45b966db
ZW
1350 case '=':
1351 case '^':
1352 if (PEEKC () == '=')
1353 goto op2;
1354 goto randomchar;
1355
1356 case '-':
1357 c2 = PEEKC ();
1358 if (c2 == '-')
1359 {
ae79697b 1360 if (CPP_OPTION (pfile, chill))
45b966db
ZW
1361 goto comment; /* Chill style comment */
1362 else
1363 goto op2;
1364 }
1365 else if (c2 == '=')
1366 goto op2;
1367 else if (c2 == '>')
1368 {
ae79697b 1369 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
45b966db
ZW
1370 {
1371 /* In C++, there's a ->* operator. */
1372 token = CPP_OTHER;
45b966db
ZW
1373 CPP_RESERVE (pfile, 4);
1374 CPP_PUTC_Q (pfile, c);
1375 CPP_PUTC_Q (pfile, GETC ());
1376 CPP_PUTC_Q (pfile, GETC ());
45b966db
ZW
1377 return token;
1378 }
1379 goto op2;
1380 }
1381 goto randomchar;
1382
1383 case '<':
1384 if (pfile->parsing_include_directive)
1385 {
1386 for (;;)
1387 {
1388 CPP_PUTC (pfile, c);
1389 if (c == '>')
1390 break;
1391 c = GETC ();
1392 if (c == '\n' || c == EOF)
1393 {
1394 cpp_error (pfile,
1395 "missing '>' in `#include <FILENAME>'");
1396 break;
1397 }
1398 else if (c == '\r')
1399 {
1400 if (!CPP_BUFFER (pfile)->has_escapes)
1401 {
1402 /* Backslash newline is replaced by nothing. */
1403 CPP_ADJUST_WRITTEN (pfile, -1);
1404 CPP_BUMP_LINE (pfile);
1405 }
1406 else
1407 {
1408 /* We might conceivably get \r- or \r<space> in
1409 here. Just delete 'em. */
1410 int d = GETC();
1411 if (d != '-' && d != ' ')
1412 cpp_ice (pfile, "unrecognized escape \\r%c", d);
1413 CPP_ADJUST_WRITTEN (pfile, -1);
1414 }
1415 }
1416 }
1417 return CPP_STRING;
1418 }
5eec0563
JM
1419 /* Digraphs: "<%" == "{", "<:" == "[". */
1420 c2 = PEEKC ();
1421 if (c2 == '%')
1422 {
1423 FORWARD (1);
1424 CPP_RESERVE (pfile, 2);
1425 CPP_PUTC_Q (pfile, c);
1426 CPP_PUTC_Q (pfile, c2);
1368ee70 1427 return CPP_CLOSE_BRACE;
5eec0563
JM
1428 }
1429 else if (c2 == ':')
1430 goto op2;
45b966db
ZW
1431 /* else fall through */
1432 case '>':
1433 c2 = PEEKC ();
1434 if (c2 == '=')
1435 goto op2;
1436 /* GNU C++ supports MIN and MAX operators <? and >?. */
ae79697b 1437 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
45b966db
ZW
1438 goto randomchar;
1439 FORWARD(1);
5eec0563
JM
1440 CPP_RESERVE (pfile, 3);
1441 CPP_PUTC_Q (pfile, c);
1442 CPP_PUTC_Q (pfile, c2);
1443 if (PEEKC () == '=')
45b966db 1444 CPP_PUTC_Q (pfile, GETC ());
45b966db
ZW
1445 return CPP_OTHER;
1446
1447 case '.':
1448 c2 = PEEKC ();
5eec0563 1449 if (ISDIGIT (c2))
45b966db 1450 {
5eec0563 1451 CPP_PUTC (pfile, c);
45b966db
ZW
1452 c = GETC ();
1453 goto number;
1454 }
1455
1456 /* In C++ there's a .* operator. */
ae79697b 1457 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
45b966db
ZW
1458 goto op2;
1459
1460 if (c2 == '.' && PEEKN(1) == '.')
1461 {
5eec0563 1462 CPP_RESERVE (pfile, 3);
45b966db
ZW
1463 CPP_PUTC_Q (pfile, '.');
1464 CPP_PUTC_Q (pfile, '.');
1465 CPP_PUTC_Q (pfile, '.');
1466 FORWARD (2);
1368ee70 1467 return CPP_ELLIPSIS;
45b966db
ZW
1468 }
1469 goto randomchar;
1470
1471 op2:
5eec0563 1472 CPP_RESERVE (pfile, 2);
45b966db
ZW
1473 CPP_PUTC_Q (pfile, c);
1474 CPP_PUTC_Q (pfile, GETC ());
5eec0563 1475 return CPP_OTHER;
45b966db
ZW
1476
1477 case 'L':
1478 c2 = PEEKC ();
1479 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1480 {
1481 CPP_PUTC (pfile, c);
1482 c = GETC ();
1483 parse_string (pfile, c);
45b966db
ZW
1484 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1485 }
1486 goto letter;
1487
1488 case '0': case '1': case '2': case '3': case '4':
1489 case '5': case '6': case '7': case '8': case '9':
1490 number:
1491 c2 = '.';
1492 for (;;)
1493 {
1494 CPP_RESERVE (pfile, 2);
1495 CPP_PUTC_Q (pfile, c);
1496 c = PEEKC ();
1497 if (c == EOF)
1498 break;
1499 if (!is_numchar(c) && c != '.'
1500 && ((c2 != 'e' && c2 != 'E'
1501 && ((c2 != 'p' && c2 != 'P')
ae79697b 1502 || CPP_OPTION (pfile, c89)))
45b966db
ZW
1503 || (c != '+' && c != '-')))
1504 break;
1505 FORWARD(1);
1506 c2= c;
1507 }
45b966db
ZW
1508 return CPP_NUMBER;
1509 case 'b': case 'c': case 'd': case 'h': case 'o':
1510 case 'B': case 'C': case 'D': case 'H': case 'O':
ae79697b 1511 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
45b966db 1512 {
45b966db
ZW
1513 CPP_RESERVE (pfile, 2);
1514 CPP_PUTC_Q (pfile, c);
1515 CPP_PUTC_Q (pfile, '\'');
1516 FORWARD(1);
1517 for (;;)
1518 {
1519 c = GETC();
1520 if (c == EOF)
1521 goto chill_number_eof;
1522 if (!is_numchar(c))
1523 break;
1524 CPP_PUTC (pfile, c);
1525 }
1526 if (c == '\'')
1527 {
1528 CPP_RESERVE (pfile, 2);
1529 CPP_PUTC_Q (pfile, c);
45b966db
ZW
1530 return CPP_STRING;
1531 }
1532 else
1533 {
1534 FORWARD(-1);
1535 chill_number_eof:
45b966db
ZW
1536 return CPP_NUMBER;
1537 }
1538 }
1539 else
1540 goto letter;
1541 case '_':
1542 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1543 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1544 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1545 case 'x': case 'y': case 'z':
1546 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1547 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1548 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1549 case 'Y': case 'Z':
1550 letter:
45b966db
ZW
1551 _cpp_parse_name (pfile, c);
1552 return CPP_MACRO;
1553
64aaf407
NB
1554 case ' ': case '\t': case '\v': case '\f': case '\0':
1555 {
1556 int null_count = 0;
1557
1558 for (;;)
1559 {
1560 if (c == '\0')
1561 null_count++;
1562 else
1563 CPP_PUTC (pfile, c);
1564 c = PEEKC ();
1565 if (c == EOF || !is_hspace(c))
1566 break;
1567 FORWARD(1);
1568 }
1569 if (null_count)
1570 null_warning (pfile, null_count);
1571 return CPP_HSPACE;
1572 }
45b966db
ZW
1573
1574 case '\r':
1575 if (CPP_BUFFER (pfile)->has_escapes)
1576 {
1577 c = GETC ();
1578 if (c == '-')
1579 {
1580 if (pfile->output_escapes)
1581 CPP_PUTS (pfile, "\r-", 2);
1582 _cpp_parse_name (pfile, GETC ());
1583 return CPP_NAME;
1584 }
1585 else if (c == ' ')
1586 {
ff2b53ef
ZW
1587 /* "\r " means a space, but only if necessary to prevent
1588 accidental token concatenation. */
45b966db
ZW
1589 CPP_RESERVE (pfile, 2);
1590 if (pfile->output_escapes)
1591 CPP_PUTC_Q (pfile, '\r');
1592 CPP_PUTC_Q (pfile, c);
1593 return CPP_HSPACE;
1594 }
1595 else
1596 {
1597 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1598 goto get_next;
1599 }
1600 }
1601 else
1602 {
1603 /* Backslash newline is ignored. */
cbccf5e8
MM
1604 if (!ACTIVE_MARK_P (pfile))
1605 CPP_BUMP_LINE (pfile);
45b966db
ZW
1606 goto get_next;
1607 }
1608
1609 case '\n':
1610 CPP_PUTC (pfile, c);
45b966db
ZW
1611 return CPP_VSPACE;
1612
1368ee70
ZW
1613 case '(': token = CPP_OPEN_PAREN; goto char1;
1614 case ')': token = CPP_CLOSE_PAREN; goto char1;
1615 case '{': token = CPP_OPEN_BRACE; goto char1;
1616 case '}': token = CPP_CLOSE_BRACE; goto char1;
1617 case ',': token = CPP_COMMA; goto char1;
1618 case ';': token = CPP_SEMICOLON; goto char1;
45b966db
ZW
1619
1620 randomchar:
1621 default:
1622 token = CPP_OTHER;
1623 char1:
45b966db
ZW
1624 CPP_PUTC (pfile, c);
1625 return token;
1626 }
1627}
1628
1629/* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1630 Caller is expected to have checked no_macro_expand. */
1631static int
1632maybe_macroexpand (pfile, written)
1633 cpp_reader *pfile;
1634 long written;
1635{
1636 U_CHAR *macro = pfile->token_buffer + written;
1637 size_t len = CPP_WRITTEN (pfile) - written;
f8f769ea 1638 cpp_hashnode *hp = cpp_lookup (pfile, macro, len);
45b966db 1639
f8f769ea 1640 /* cpp_lookup never returns null. */
a7abcbbf 1641 if (hp->type == T_VOID)
45b966db 1642 return 0;
d9e0bd53 1643 if (hp->disabled || hp->type == T_IDENTITY)
45b966db
ZW
1644 {
1645 if (pfile->output_escapes)
1646 {
1647 /* Insert a no-reexpand marker before IDENT. */
1648 CPP_RESERVE (pfile, 2);
1649 CPP_ADJUST_WRITTEN (pfile, 2);
1650 macro = pfile->token_buffer + written;
1651
1652 memmove (macro + 2, macro, len);
1653 macro[0] = '\r';
1654 macro[1] = '-';
1655 }
1656 return 0;
1657 }
ff2b53ef
ZW
1658 if (hp->type == T_EMPTY)
1659 {
1660 /* Special case optimization: macro expands to nothing. */
1661 CPP_SET_WRITTEN (pfile, written);
1662 CPP_PUTC_Q (pfile, ' ');
1663 return 1;
1664 }
45b966db
ZW
1665
1666 /* If macro wants an arglist, verify that a '(' follows. */
d9e0bd53 1667 if (hp->type == T_FMACRO)
45b966db
ZW
1668 {
1669 int macbuf_whitespace = 0;
1670 int c;
1671
1672 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1673 {
1674 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1675 for (;;)
1676 {
1677 _cpp_skip_hspace (pfile);
1678 c = PEEKC ();
1679 if (c == '\n')
1680 FORWARD(1);
1681 else
1682 break;
1683 }
1684 if (point != CPP_BUFFER (pfile)->cur)
1685 macbuf_whitespace = 1;
1686 if (c == '(')
1687 goto is_macro_call;
1688 else if (c != EOF)
1689 goto not_macro_call;
1690 cpp_pop_buffer (pfile);
1691 }
1692
1693 CPP_SET_MARK (pfile);
1694 for (;;)
1695 {
1696 _cpp_skip_hspace (pfile);
1697 c = PEEKC ();
1698 if (c == '\n')
1699 FORWARD(1);
1700 else
1701 break;
1702 }
1703 CPP_GOTO_MARK (pfile);
1704
1705 if (c != '(')
1706 {
1707 not_macro_call:
1708 if (macbuf_whitespace)
1709 CPP_PUTC (pfile, ' ');
476f2869
ZW
1710
1711 /* K+R treated this as a hard error. */
d7a2e0f7 1712 if (CPP_WTRADITIONAL (pfile))
476f2869 1713 cpp_warning (pfile,
d7a2e0f7 1714 "function macro %s must be used with arguments in traditional C",
476f2869 1715 hp->name);
45b966db
ZW
1716 return 0;
1717 }
1718 }
1719
1720 is_macro_call:
1721 /* This is now known to be a macro call.
1722 Expand the macro, reading arguments as needed,
1723 and push the expansion on the input stack. */
1724 _cpp_macroexpand (pfile, hp);
1725 CPP_SET_WRITTEN (pfile, written);
1726 return 1;
1727}
1728
9e62c811
ZW
1729/* Complain about \v or \f in a preprocessing directive (constraint
1730 violation, C99 6.10 para 5). Caller has checked CPP_PEDANTIC. */
1731static void
1732pedantic_whitespace (pfile, p, len)
1733 cpp_reader *pfile;
1734 U_CHAR *p;
1735 unsigned int len;
1736{
1737 while (len)
1738 {
1739 if (*p == '\v')
1740 cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1741 else if (*p == '\f')
1742 cpp_pedwarn (pfile, "form feed in preprocessing directive");
1743 p++;
1744 len--;
1745 }
1746}
1747
1748
3a2b2c7a 1749enum cpp_ttype
45b966db
ZW
1750cpp_get_token (pfile)
1751 cpp_reader *pfile;
1752{
3a2b2c7a 1753 enum cpp_ttype token;
45b966db 1754 long written = CPP_WRITTEN (pfile);
ea4a453b 1755 int macro_buffer;
45b966db
ZW
1756
1757 get_next:
1758 token = _cpp_lex_token (pfile);
1759
1760 switch (token)
1761 {
1762 default:
ea4a453b
ZW
1763 if (pfile->skipping)
1764 break;
ff2b53ef
ZW
1765 pfile->potential_control_macro = 0;
1766 pfile->only_seen_white = 0;
ea4a453b
ZW
1767 break;
1768
1769 case CPP_HSPACE:
1770 case CPP_COMMENT:
1771 break;
ff2b53ef
ZW
1772
1773 case CPP_VSPACE:
1774 if (pfile->only_seen_white == 0)
1775 pfile->only_seen_white = 1;
1776 CPP_BUMP_LINE (pfile);
ea4a453b 1777 break;
45b966db 1778
15dad1d9 1779 case CPP_HASH:
ff2b53ef 1780 pfile->potential_control_macro = 0;
15dad1d9 1781 if (!pfile->only_seen_white)
ea4a453b 1782 break;
15dad1d9
ZW
1783 /* XXX shouldn't have to do this - remove the hash or %: from
1784 the token buffer. */
1785 if (CPP_PWRITTEN (pfile)[-1] == '#')
1786 CPP_ADJUST_WRITTEN (pfile, -1);
1787 else
1788 CPP_ADJUST_WRITTEN (pfile, -2);
1789
45b966db 1790 if (_cpp_handle_directive (pfile))
ea4a453b
ZW
1791 {
1792 token = CPP_DIRECTIVE;
1793 break;
1794 }
45b966db
ZW
1795 pfile->only_seen_white = 0;
1796 CPP_PUTC (pfile, '#');
ea4a453b 1797 break;
45b966db
ZW
1798
1799 case CPP_MACRO:
ea4a453b
ZW
1800 if (pfile->skipping)
1801 break;
ff2b53ef
ZW
1802 pfile->potential_control_macro = 0;
1803 pfile->only_seen_white = 0;
45b966db
ZW
1804 if (! pfile->no_macro_expand
1805 && maybe_macroexpand (pfile, written))
1806 goto get_next;
ea4a453b
ZW
1807 token = CPP_NAME;
1808 break;
45b966db 1809
ea4a453b 1810 /* Do not run this case through the 'skipping' logic. */
45b966db 1811 case CPP_EOF:
f2d5f0cc
ZW
1812 if (CPP_BUFFER (pfile) == NULL)
1813 return CPP_EOF;
ea4a453b
ZW
1814 macro_buffer = CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile));
1815
c56c2073 1816 cpp_pop_buffer (pfile);
ea4a453b
ZW
1817 if (macro_buffer)
1818 goto get_next;
c56c2073 1819 return CPP_EOF;
45b966db 1820 }
ea4a453b
ZW
1821
1822 if (pfile->skipping)
1823 {
1824 CPP_SET_WRITTEN (pfile, written);
1825 goto get_next;
1826 }
1827 return token;
45b966db
ZW
1828}
1829
1830/* Like cpp_get_token, but skip spaces and comments. */
1831
3a2b2c7a 1832enum cpp_ttype
45b966db
ZW
1833cpp_get_non_space_token (pfile)
1834 cpp_reader *pfile;
1835{
1836 int old_written = CPP_WRITTEN (pfile);
1837 for (;;)
1838 {
3a2b2c7a 1839 enum cpp_ttype token = cpp_get_token (pfile);
ff2b53ef 1840 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
45b966db
ZW
1841 return token;
1842 CPP_SET_WRITTEN (pfile, old_written);
1843 }
1844}
1845
ff2b53ef 1846/* Like cpp_get_token, except that it does not execute directives,
c56c2073 1847 does not consume vertical space, and discards horizontal space. */
3a2b2c7a 1848enum cpp_ttype
9e62c811 1849_cpp_get_directive_token (pfile)
45b966db
ZW
1850 cpp_reader *pfile;
1851{
ff2b53ef 1852 long old_written;
3a2b2c7a 1853 enum cpp_ttype token;
57c578a6 1854 int at_bol;
45b966db 1855
ff2b53ef 1856 get_next:
57c578a6 1857 at_bol = (CPP_BUFFER (pfile)->cur == CPP_BUFFER (pfile)->line_base);
ff2b53ef
ZW
1858 old_written = CPP_WRITTEN (pfile);
1859 token = _cpp_lex_token (pfile);
1860 switch (token)
45b966db 1861 {
ff2b53ef
ZW
1862 default:
1863 return token;
45b966db 1864
ff2b53ef
ZW
1865 case CPP_VSPACE:
1866 /* Put it back and return VSPACE. */
1867 FORWARD(-1);
1868 CPP_ADJUST_WRITTEN (pfile, -1);
1869 return CPP_VSPACE;
45b966db 1870
ff2b53ef 1871 case CPP_HSPACE:
57c578a6
ZW
1872 /* The purpose of this rather strange check is to prevent pedantic
1873 warnings for ^L in an #ifdefed out block. */
1874 if (CPP_PEDANTIC (pfile) && ! at_bol)
9e62c811
ZW
1875 pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1876 CPP_WRITTEN (pfile) - old_written);
1877 CPP_SET_WRITTEN (pfile, old_written);
1878 goto get_next;
ff2b53ef 1879 return CPP_HSPACE;
45b966db 1880
ff2b53ef
ZW
1881 case CPP_MACRO:
1882 if (! pfile->no_macro_expand
1883 && maybe_macroexpand (pfile, old_written))
1884 goto get_next;
1885 return CPP_NAME;
45b966db 1886
ff2b53ef
ZW
1887 case CPP_EOF:
1888 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
45b966db 1889 {
ff2b53ef
ZW
1890 cpp_pop_buffer (pfile);
1891 goto get_next;
45b966db 1892 }
ff2b53ef
ZW
1893 else
1894 /* This can happen for files that don't end with a newline,
1895 and for cpp_define and friends. Pretend they do, so
1896 callers don't have to deal. A warning will be issued by
1897 someone else, if necessary. */
1898 return CPP_VSPACE;
1899 }
1900}
1901
45b966db
ZW
1902/* Determine the current line and column. Used only by read_and_prescan. */
1903static U_CHAR *
1904find_position (start, limit, linep)
1905 U_CHAR *start;
1906 U_CHAR *limit;
1907 unsigned long *linep;
1908{
1909 unsigned long line = *linep;
1910 U_CHAR *lbase = start;
1911 while (start < limit)
1912 {
1913 U_CHAR ch = *start++;
1914 if (ch == '\n' || ch == '\r')
1915 {
1916 line++;
1917 lbase = start;
1918 }
1919 }
1920 *linep = line;
1921 return lbase;
1922}
1923
f8f769ea 1924/* The following table is used by _cpp_prescan. If we have
2a87fbe8
ZW
1925 designated initializers, it can be constant data; otherwise, it is
1926 set up at runtime by _cpp_init_input_buffer. */
46d07497 1927
12cf91fe 1928#if (GCC_VERSION >= 2007)
2a87fbe8 1929#define init_chartab() /* nothing */
12cf91fe 1930#define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
46d07497
ZW
1931#define END };
1932#define s(p, v) [p] = v,
1933#else
12cf91fe 1934#define CHARTAB static U_CHAR chartab[UCHAR_MAX + 1] = { 0 }; \
2a87fbe8
ZW
1935 static void init_chartab PARAMS ((void)) { \
1936 unsigned char *x = chartab;
46d07497
ZW
1937#define END }
1938#define s(p, v) x[p] = v;
1939#endif
1940
1941/* Table of characters that can't be handled in the inner loop.
2a87fbe8
ZW
1942 Also contains the mapping between trigraph third characters and their
1943 replacements. */
46d07497
ZW
1944#define SPECCASE_CR 1
1945#define SPECCASE_BACKSLASH 2
1946#define SPECCASE_QUESTION 3
1947
2a87fbe8 1948CHARTAB
46d07497
ZW
1949 s('\r', SPECCASE_CR)
1950 s('\\', SPECCASE_BACKSLASH)
1951 s('?', SPECCASE_QUESTION)
46d07497 1952
46d07497
ZW
1953 s('=', '#') s(')', ']') s('!', '|')
1954 s('(', '[') s('\'', '^') s('>', '}')
1955 s('/', '\\') s('<', '{') s('-', '~')
1956END
1957
1958#undef CHARTAB
46d07497
ZW
1959#undef END
1960#undef s
1961
2a87fbe8
ZW
1962#define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1963#define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1964
f8f769ea
ZW
1965/* Prescan pass over a file already loaded into BUF. This is
1966 translation phases 1 and 2 (C99 5.1.1.2).
1967
1968 Convert end-of-line markers (\n, \r, \r\n, \n\r) to
45b966db
ZW
1969 canonical form (\n). If enabled, convert and/or warn about
1970 trigraphs. Convert backslash-newline to a one-character escape
1971 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1972 token). If there is no newline at the end of the file, add one and
1973 warn. Returns -1 on failure, or the actual length of the data to
1974 be scanned.
1975
1976 This function does a lot of work, and can be a serious performance
1977 bottleneck. It has been tuned heavily; make sure you understand it
1978 before hacking. The common case - no trigraphs, Unix style line
1979 breaks, backslash-newline set off by whitespace, newline at EOF -
1980 has been optimized at the expense of the others. The performance
1981 penalty for DOS style line breaks (\r\n) is about 15%.
1982
1983 Warnings lose particularly heavily since we have to determine the
1984 line number, which involves scanning from the beginning of the file
1985 or from the last warning. The penalty for the absence of a newline
1986 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1987
1988 If your file has more than one kind of end-of-line marker, you
f8f769ea
ZW
1989 will get messed-up line numbering. */
1990
1991ssize_t
1992_cpp_prescan (pfile, fp, len)
45b966db
ZW
1993 cpp_reader *pfile;
1994 cpp_buffer *fp;
f8f769ea 1995 ssize_t len;
45b966db 1996{
f8f769ea
ZW
1997 U_CHAR *buf, *op;
1998 const U_CHAR *ibase, *ip, *ilimit;
1999 U_CHAR *line_base;
45b966db
ZW
2000 unsigned long line;
2001 unsigned int deferred_newlines;
45b966db 2002
f8f769ea
ZW
2003 /* Allocate an extra byte in case we must add a trailing \n. */
2004 buf = (U_CHAR *) xmalloc (len + 1);
2005 line_base = op = buf;
2006 ip = ibase = fp->buf;
2007 ilimit = ibase + len;
45b966db 2008 line = 1;
f8f769ea 2009 deferred_newlines = 0;
45b966db
ZW
2010
2011 for (;;)
2012 {
f8f769ea 2013 const U_CHAR *iq;
04e3ec78 2014
f8f769ea
ZW
2015 /* Deal with \-newline, potentially in the middle of a token. */
2016 if (deferred_newlines)
45b966db 2017 {
f8f769ea 2018 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
04e3ec78 2019 {
f8f769ea
ZW
2020 /* Previous was not white space. Skip to white
2021 space, if we can, before outputting the \r's */
2022 iq = ip;
2023 while (iq < ilimit
2024 && *iq != ' '
2025 && *iq != '\t'
2026 && *iq != '\n'
2027 && NORMAL(*iq))
2028 iq++;
2029 memcpy (op, ip, iq - ip);
2030 op += iq - ip;
2031 ip += iq - ip;
2032 if (! NORMAL(*ip))
2033 goto do_speccase;
04e3ec78 2034 }
f8f769ea
ZW
2035 while (deferred_newlines)
2036 deferred_newlines--, *op++ = '\r';
45b966db
ZW
2037 }
2038
f8f769ea
ZW
2039 /* Copy as much as we can without special treatment. */
2040 iq = ip;
2041 while (iq < ilimit && NORMAL (*iq)) iq++;
2042 memcpy (op, ip, iq - ip);
2043 op += iq - ip;
2044 ip += iq - ip;
45b966db 2045
f8f769ea
ZW
2046 do_speccase:
2047 if (ip >= ilimit)
2048 break;
2049
2050 switch (chartab[*ip++])
2051 {
2052 case SPECCASE_CR: /* \r */
2053 if (ip[-2] != '\n')
45b966db 2054 {
f8f769ea
ZW
2055 if (ip < ilimit && *ip == '\n')
2056 ip++;
2057 *op++ = '\n';
45b966db 2058 }
f8f769ea 2059 break;
45b966db 2060
f8f769ea
ZW
2061 case SPECCASE_BACKSLASH: /* \ */
2062 backslash:
2063 if (ip < ilimit)
45b966db 2064 {
04e3ec78 2065 if (*ip == '\n')
45b966db 2066 {
04e3ec78 2067 deferred_newlines++;
45b966db
ZW
2068 ip++;
2069 if (*ip == '\r') ip++;
f8f769ea 2070 break;
45b966db
ZW
2071 }
2072 else if (*ip == '\r')
2073 {
04e3ec78 2074 deferred_newlines++;
45b966db
ZW
2075 ip++;
2076 if (*ip == '\n') ip++;
f8f769ea 2077 break;
45b966db 2078 }
f8f769ea 2079 }
45b966db 2080
f8f769ea
ZW
2081 *op++ = '\\';
2082 break;
04e3ec78 2083
f8f769ea
ZW
2084 case SPECCASE_QUESTION: /* ? */
2085 {
2086 unsigned int d, t;
45b966db 2087
f8f769ea
ZW
2088 *op++ = '?'; /* Normal non-trigraph case */
2089 if (ip > ilimit - 2 || ip[0] != '?')
2090 break;
2091
2092 d = ip[1];
2093 t = chartab[d];
2094 if (NONTRI (t))
2095 break;
04e3ec78 2096
f8f769ea
ZW
2097 if (CPP_OPTION (pfile, warn_trigraphs))
2098 {
2099 unsigned long col;
2100 line_base = find_position (line_base, op, &line);
2101 col = op - line_base + 1;
ae79697b 2102 if (CPP_OPTION (pfile, trigraphs))
f8f769ea
ZW
2103 cpp_warning_with_line (pfile, line, col,
2104 "trigraph ??%c converted to %c", d, t);
45b966db 2105 else
f8f769ea
ZW
2106 cpp_warning_with_line (pfile, line, col,
2107 "trigraph ??%c ignored", d);
2108 }
2109
2110 ip += 2;
2111 if (CPP_OPTION (pfile, trigraphs))
2112 {
2113 op[-1] = t; /* Overwrite '?' */
2114 if (t == '\\')
45b966db 2115 {
f8f769ea
ZW
2116 op--;
2117 goto backslash;
45b966db
ZW
2118 }
2119 }
f8f769ea
ZW
2120 else
2121 {
2122 *op++ = '?';
2123 *op++ = d;
2124 }
2125 }
2126 break;
45b966db
ZW
2127 }
2128 }
2129
f8f769ea
ZW
2130#ifdef HAVE_MMAP_FILE
2131 if (fp->mapped)
2132 munmap ((caddr_t) fp->buf, len);
2133 else
2134#endif
2135 free ((PTR) fp->buf);
45b966db 2136
45b966db
ZW
2137 if (op[-1] != '\n')
2138 {
2139 unsigned long col;
2140 line_base = find_position (line_base, op, &line);
2141 col = op - line_base + 1;
f6fab919 2142 cpp_warning_with_line (pfile, line, col, "no newline at end of file");
45b966db
ZW
2143 *op++ = '\n';
2144 }
2145
f8f769ea 2146 fp->buf = buf;
45b966db 2147 return op - buf;
45b966db
ZW
2148}
2149
2a87fbe8
ZW
2150/* Allocate pfile->input_buffer, and initialize chartab[]
2151 if it hasn't happened already. */
46d07497 2152
45b966db
ZW
2153void
2154_cpp_init_input_buffer (pfile)
2155 cpp_reader *pfile;
2156{
2157 U_CHAR *tmp;
2158
2a87fbe8 2159 init_chartab ();
d1d9a6bd 2160 _cpp_init_toklist (&pfile->directbuf, NO_DUMMY_TOKEN);
04e3ec78 2161
45b966db
ZW
2162 /* Determine the appropriate size for the input buffer. Normal C
2163 source files are smaller than eight K. */
04e3ec78
NB
2164 /* 8Kbytes of buffer proper, 1 to detect running off the end without
2165 address arithmetic all the time, and 3 for pushback during buffer
2166 refill, in case there's a potential trigraph or end-of-line
2167 digraph at the end of a block. */
45b966db 2168
04e3ec78 2169 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
45b966db
ZW
2170 pfile->input_buffer = tmp;
2171 pfile->input_buffer_len = 8192;
2172}
c5a04734 2173
6d2c2047
ZW
2174/* Utility routine:
2175 Compares, in the manner of strcmp(3), the token beginning at TOKEN
2176 and extending for LEN characters to the NUL-terminated string
2177 STRING. Typical usage:
2178
2179 if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2180 "inline"))
2181 { ... }
2182 */
2183
2184int
2185cpp_idcmp (token, len, string)
2186 const U_CHAR *token;
2187 size_t len;
2188 const char *string;
2189{
2190 size_t len2 = strlen (string);
2191 int r;
2192
2193 if ((r = memcmp (token, string, MIN (len, len2))))
2194 return r;
2195
2196 /* The longer of the two strings sorts after the shorter. */
2197 if (len == len2)
2198 return 0;
2199 else if (len < len2)
2200 return -1;
2201 else
2202 return 1;
2203}
2204
b8f41010 2205#ifdef NEW_LEXER
c5a04734 2206
d6d5f795
NB
2207/* Lexing algorithm.
2208
2209 The original lexer in cpplib was made up of two passes: a first pass
2210 that replaced trigraphs and deleted esacped newlines, and a second
2211 pass that tokenized the result of the first pass. Tokenisation was
2212 performed by peeking at the next character in the input stream. For
6777db6d 2213 example, if the input stream contained "!=", the handler for the !
d6d5f795 2214 character would peek at the next character, and if it were a '='
6777db6d
NB
2215 would skip over it, and return a "!=" token, otherwise it would
2216 return just the "!" token.
d6d5f795
NB
2217
2218 To implement a single-pass lexer, this peeking ahead is unworkable.
2219 An arbitrary number of escaped newlines, and trigraphs (in particular
6777db6d
NB
2220 ??/ which translates to the escape \), could separate the '!' and '='
2221 in the input stream, yet the next token is still a "!=".
d6d5f795
NB
2222
2223 Suppose instead that we lex by one logical line at a time, producing
6777db6d
NB
2224 a token list or stack for each logical line, and when seeing the '!'
2225 push a CPP_NOT token on the list. Then if the '!' is part of a
2226 longer token ("!=") we know we must see the remainder of the token by
2227 the time we reach the end of the logical line. Thus we can have the
2228 '=' handler look at the previous token (at the end of the list / top
2229 of the stack) and see if it is a "!" token, and if so, instead of
2230 pushing a "=" token revise the existing token to be a "!=" token.
d6d5f795
NB
2231
2232 This works in the presence of escaped newlines, because the '\' would
2233 have been pushed on the top of the stack as a CPP_BACKSLASH. The
2234 newline ('\n' or '\r') handler looks at the token at the top of the
2235 stack to see if it is a CPP_BACKSLASH, and if so discards both.
2236 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
2237 the '=' handler would never see any intervening escaped newlines.
2238
2239 To make trigraphs work in this context, as in precedence trigraphs
2240 are highest and converted before anything else, the '?' handler does
2241 lookahead to see if it is a trigraph, and if so skips the trigraph
2242 and pushes the token it represents onto the top of the stack. This
2243 also works in the particular case of a CPP_BACKSLASH trigraph.
2244
2245 To the preprocessor, whitespace is only significant to the point of
2246 knowing whether whitespace precedes a particular token. For example,
2247 the '=' handler needs to know whether there was whitespace between it
6777db6d 2248 and a "!" token on the top of the stack, to make the token conversion
d6d5f795
NB
2249 decision correctly. So each token has a PREV_WHITESPACE flag to
2250 indicate this - the standard permits consecutive whitespace to be
2251 regarded as a single space. The compiler front ends are not
2252 interested in whitespace at all; they just require a token stream.
2253 Another place where whitespace is significant to the preprocessor is
2254 a #define statment - if there is whitespace between the macro name
2255 and an initial "(" token the macro is "object-like", otherwise it is
2256 a function-like macro that takes arguments.
2257
2258 However, all is not rosy. Parsing of identifiers, numbers, comments
2259 and strings becomes trickier because of the possibility of raw
2260 trigraphs and escaped newlines in the input stream.
2261
2262 The trigraphs are three consecutive characters beginning with two
c2e25d51
NB
2263 question marks. A question mark is not valid as part of a number or
2264 identifier, so parsing of a number or identifier terminates normally
2265 upon reaching it, returning to the mainloop which handles the
2266 trigraph just like it would in any other position. Similarly for the
2267 backslash of a backslash-newline combination. So we just need the
2268 escaped-newline dropper in the mainloop to check if the token on the
2269 top of the stack after dropping the escaped newline is a number or
2270 identifier, and if so to continue the processing it as if nothing had
2271 happened.
d6d5f795
NB
2272
2273 For strings, we replace trigraphs whenever we reach a quote or
2274 newline, because there might be a backslash trigraph escaping them.
2275 We need to be careful that we start trigraph replacing from where we
2276 left off previously, because it is possible for a first scan to leave
2277 "fake" trigraphs that a second scan would pick up as real (e.g. the
c2e25d51 2278 sequence "????/\n=" would find a fake ??= trigraph after removing the
d6d5f795
NB
2279 escaped newline.)
2280
2281 For line comments, on reaching a newline we scan the previous
2282 character(s) to see if it escaped, and continue if it is. Block
2283 comments ignore everything and just focus on finding the comment
2284 termination mark. The only difficult thing, and it is surprisingly
2285 tricky, is checking if an asterisk precedes the final slash since
2286 they could be separated by escaped newlines. If the preprocessor is
2287 invoked with the output comments option, we don't bother removing
2288 escaped newlines and replacing trigraphs for output.
2289
2290 Finally, numbers can begin with a period, which is pushed initially
2291 as a CPP_DOT token in its own right. The digit handler checks if the
2292 previous token was a CPP_DOT not separated by whitespace, and if so
2293 pops it off the stack and pushes a period into the number's buffer
2294 before calling the number parser.
2295
2296*/
2297
b8f41010
NB
2298static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
2299 U":>", U"<%", U"%>"};
2300static unsigned char trigraph_map[256];
c5a04734 2301
c5a04734
ZW
2302void
2303init_trigraph_map ()
2304{
2305 trigraph_map['='] = '#';
2306 trigraph_map['('] = '[';
2307 trigraph_map[')'] = ']';
2308 trigraph_map['/'] = '\\';
2309 trigraph_map['\''] = '^';
2310 trigraph_map['<'] = '{';
2311 trigraph_map['>'] = '}';
2312 trigraph_map['!'] = '|';
2313 trigraph_map['-'] = '~';
2314}
2315
2316/* Call when a trigraph is encountered. It warns if necessary, and
2317 returns true if the trigraph should be honoured. END is the third
2318 character of a trigraph in the input stream. */
2319static int
2320trigraph_ok (pfile, end)
2321 cpp_reader *pfile;
2322 const unsigned char *end;
2323{
2324 int accept = CPP_OPTION (pfile, trigraphs);
2325
2326 if (CPP_OPTION (pfile, warn_trigraphs))
2327 {
2328 unsigned int col = end - 1 - pfile->buffer->line_base;
2329 if (accept)
2330 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2331 "trigraph ??%c converted to %c",
2332 (int) *end, (int) trigraph_map[*end]);
2333 else
2334 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2335 "trigraph ??%c ignored", (int) *end);
2336 }
2337 return accept;
2338}
2339
2340/* Scan a string for trigraphs, warning or replacing them inline as
2341 appropriate. When parsing a string, we must call this routine
2342 before processing a newline character (if trigraphs are enabled),
2343 since the newline might be escaped by a preceding backslash
2344 trigraph sequence. Returns a pointer to the end of the name after
2345 replacement. */
2346
2347static unsigned char*
2348trigraph_replace (pfile, src, limit)
2349 cpp_reader *pfile;
2350 unsigned char *src;
2351 unsigned char* limit;
2352{
2353 unsigned char *dest;
2354
2355 /* Starting with src[1], find two consecutive '?'. The case of no
2356 trigraphs is streamlined. */
2357
2358 for (; src + 1 < limit; src += 2)
2359 {
2360 if (src[0] != '?')
2361 continue;
2362
2363 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
2364 if (src[-1] == '?')
2365 src--;
2366 else if (src + 2 == limit || src[1] != '?')
2367 continue;
2368
2369 /* Check if it really is a trigraph. */
2370 if (trigraph_map[src[2]] == 0)
2371 continue;
2372
2373 dest = src;
2374 goto trigraph_found;
2375 }
2376 return limit;
2377
2378 /* Now we have a trigraph, we need to scan the remaining buffer, and
2379 copy-shifting its contents left if replacement is enabled. */
2380 for (; src + 2 < limit; dest++, src++)
2381 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2382 {
2383 trigraph_found:
2384 src += 2;
2385 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2386 *dest = trigraph_map[*src];
2387 }
2388
2389 /* Copy remaining (at most 2) characters. */
2390 while (src < limit)
2391 *dest++ = *src++;
2392 return dest;
2393}
2394
2395/* If CUR is a backslash or the end of a trigraphed backslash, return
2396 a pointer to its beginning, otherwise NULL. We don't read beyond
2397 the buffer start, because there is the start of the comment in the
2398 buffer. */
2399static const unsigned char *
2400backslash_start (pfile, cur)
2401 cpp_reader *pfile;
2402 const unsigned char *cur;
2403{
2404 if (cur[0] == '\\')
2405 return cur;
2406 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2407 && trigraph_ok (pfile, cur))
2408 return cur - 2;
2409 return 0;
2410}
2411
2412/* Skip a C-style block comment. This is probably the trickiest
2413 handler. We find the end of the comment by seeing if an asterisk
2414 is before every '/' we encounter. The nasty complication is that a
2415 previous asterisk may be separated by one or more escaped newlines.
2416 Returns non-zero if comment terminated by EOF, zero otherwise. */
2417static int
b8f41010 2418skip_block_comment2 (pfile)
c5a04734
ZW
2419 cpp_reader *pfile;
2420{
2421 cpp_buffer *buffer = pfile->buffer;
2422 const unsigned char *char_after_star = 0;
2423 register const unsigned char *cur = buffer->cur;
2424 int seen_eof = 0;
2425
2426 /* Inner loop would think the comment has ended if the first comment
2427 character is a '/'. Avoid this and keep the inner loop clean by
2428 skipping such a character. */
2429 if (cur < buffer->rlimit && cur[0] == '/')
2430 cur++;
2431
2432 for (; cur < buffer->rlimit; )
2433 {
2434 unsigned char c = *cur++;
2435
2436 /* People like decorating comments with '*', so check for
2437 '/' instead for efficiency. */
2438 if (c == '/')
2439 {
2440 if (cur[-2] == '*' || cur - 1 == char_after_star)
2441 goto out;
2442
2443 /* Warn about potential nested comments, but not when
2444 the final character inside the comment is a '/'.
2445 Don't bother to get it right across escaped newlines. */
2446 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2447 && cur[0] == '*' && cur[1] != '/')
2448 {
2449 buffer->cur = cur;
2450 cpp_warning (pfile, "'/*' within comment");
2451 }
2452 }
2453 else if (IS_NEWLINE(c))
2454 {
2455 const unsigned char* bslash = backslash_start (pfile, cur - 2);
2456
2457 handle_newline (cur, buffer->rlimit, c);
2458 /* Work correctly if there is an asterisk before an
2459 arbirtrarily long sequence of escaped newlines. */
2460 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2461 char_after_star = cur;
2462 else
2463 char_after_star = 0;
2464 }
2465 }
2466 seen_eof = 1;
2467
2468 out:
2469 buffer->cur = cur;
2470 return seen_eof;
2471}
2472
2473/* Skip a C++ or Chill line comment. Handles escaped newlines.
2474 Returns non-zero if a multiline comment. */
2475static int
b8f41010 2476skip_line_comment2 (pfile)
c5a04734
ZW
2477 cpp_reader *pfile;
2478{
2479 cpp_buffer *buffer = pfile->buffer;
2480 register const unsigned char *cur = buffer->cur;
2481 int multiline = 0;
2482
2483 for (; cur < buffer->rlimit; )
2484 {
2485 unsigned char c = *cur++;
2486
2487 if (IS_NEWLINE (c))
2488 {
2489 /* Check for a (trigaph?) backslash escaping the newline. */
2490 if (!backslash_start (pfile, cur - 2))
2491 goto out;
2492 multiline = 1;
2493 handle_newline (cur, buffer->rlimit, c);
2494 }
2495 }
2496 cur++;
2497
2498 out:
2499 buffer->cur = cur - 1; /* Leave newline for caller. */
2500 return multiline;
2501}
2502
6ab3e7dd
NB
2503/* Skips whitespace, stopping at next non-whitespace character.
2504 Adjusts pfile->col_adjust to account for tabs. This enables tokens
2505 to be assigned the correct column. */
c5a04734
ZW
2506static void
2507skip_whitespace (pfile, in_directive)
2508 cpp_reader *pfile;
2509 int in_directive;
2510{
2511 cpp_buffer *buffer = pfile->buffer;
2512 register const unsigned char *cur = buffer->cur;
2513 unsigned short null_count = 0;
2514
2515 for (; cur < buffer->rlimit; )
2516 {
2517 unsigned char c = *cur++;
2518
6ab3e7dd
NB
2519 if (c == '\t')
2520 {
2521 unsigned int col = CPP_BUF_COLUMN (buffer, cur - 1);
2522 pfile->col_adjust += (CPP_OPTION (pfile, tabstop) - 1
2523 - col % CPP_OPTION(pfile, tabstop));
2524 }
c5a04734
ZW
2525 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
2526 continue;
2527 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
2528 goto out;
2529 if (c == '\0')
2530 null_count++;
2531 /* Mut be '\f' or '\v' */
2532 else if (in_directive && CPP_PEDANTIC (pfile))
2533 cpp_pedwarn (pfile, "%s in preprocessing directive",
2534 c == '\f' ? "formfeed" : "vertical tab");
2535 }
2536 cur++;
2537
2538 out:
2539 buffer->cur = cur - 1;
2540 if (null_count)
2541 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2542 : "embedded null character ignored");
2543}
2544
2545/* Parse (append) an identifier. */
2546static void
2547parse_name (pfile, list, name)
2548 cpp_reader *pfile;
2549 cpp_toklist *list;
2550 cpp_name *name;
2551{
2552 const unsigned char *name_limit;
2553 unsigned char *namebuf;
2554 cpp_buffer *buffer = pfile->buffer;
2555 register const unsigned char *cur = buffer->cur;
2556
2557 expanded:
2558 name_limit = list->namebuf + list->name_cap;
2559 namebuf = list->namebuf + list->name_used;
2560
2561 for (; cur < buffer->rlimit && namebuf < name_limit; )
2562 {
2563 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2564
2565 if (! is_idchar(c))
2566 goto out;
2567 namebuf++;
2568 cur++;
2569 if (c == '$' && CPP_PEDANTIC (pfile))
2570 {
2571 buffer->cur = cur;
2572 cpp_pedwarn (pfile, "'$' character in identifier");
2573 }
2574 }
2575
2576 /* Run out of name space? */
2577 if (cur < buffer->rlimit)
2578 {
2579 list->name_used = namebuf - list->namebuf;
2580 auto_expand_name_space (list);
2581 goto expanded;
2582 }
2583
2584 out:
2585 buffer->cur = cur;
f617b8e2 2586 name->len = namebuf - name->text;
c5a04734
ZW
2587 list->name_used = namebuf - list->namebuf;
2588}
2589
2590/* Parse (append) a number. */
2591
2592#define VALID_SIGN(c, prevc) \
2593 (((c) == '+' || (c) == '-') && \
2594 ((prevc) == 'e' || (prevc) == 'E' \
2595 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2596
2597static void
2598parse_number (pfile, list, name)
2599 cpp_reader *pfile;
2600 cpp_toklist *list;
2601 cpp_name *name;
2602{
2603 const unsigned char *name_limit;
2604 unsigned char *namebuf;
2605 cpp_buffer *buffer = pfile->buffer;
2606 register const unsigned char *cur = buffer->cur;
2607
2608 expanded:
2609 name_limit = list->namebuf + list->name_cap;
2610 namebuf = list->namebuf + list->name_used;
2611
2612 for (; cur < buffer->rlimit && namebuf < name_limit; )
2613 {
2614 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2615
2616 /* Perhaps we should accept '$' here if we accept it for
2617 identifiers. We know namebuf[-1] is safe, because for c to
2618 be a sign we must have pushed at least one character. */
2619 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2620 goto out;
2621
2622 namebuf++;
2623 cur++;
2624 }
2625
2626 /* Run out of name space? */
2627 if (cur < buffer->rlimit)
2628 {
2629 list->name_used = namebuf - list->namebuf;
2630 auto_expand_name_space (list);
2631 goto expanded;
2632 }
2633
2634 out:
2635 buffer->cur = cur;
f617b8e2 2636 name->len = namebuf - name->text;
c5a04734
ZW
2637 list->name_used = namebuf - list->namebuf;
2638}
2639
2640/* Places a string terminated by an unescaped TERMINATOR into a
2641 cpp_name, which should be expandable and thus at the top of the
2642 list's stack. Handles embedded trigraphs, if necessary, and
2643 escaped newlines.
2644
2645 Can be used for character constants (terminator = '\''), string
41e8b1d7
NB
2646 constants ('"') and angled headers ('>'). Multi-line strings are
2647 allowed, except for within directives. */
c5a04734
ZW
2648
2649static void
d1d9a6bd 2650parse_string2 (pfile, list, name, terminator, multiline_ok)
c5a04734
ZW
2651 cpp_reader *pfile;
2652 cpp_toklist *list;
2653 cpp_name *name;
2654 unsigned int terminator;
d1d9a6bd 2655 int multiline_ok;
c5a04734
ZW
2656{
2657 cpp_buffer *buffer = pfile->buffer;
2658 register const unsigned char *cur = buffer->cur;
2659 const unsigned char *name_limit;
2660 unsigned char *namebuf;
2661 unsigned int null_count = 0;
2662 int trigraphed_len = 0;
2663
2664 expanded:
2665 name_limit = list->namebuf + list->name_cap;
2666 namebuf = list->namebuf + list->name_used;
2667
2668 for (; cur < buffer->rlimit && namebuf < name_limit; )
2669 {
2670 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
2671
2672 if (c == '\0')
2673 null_count++;
2674 else if (c == terminator || IS_NEWLINE (c))
2675 {
c5a04734
ZW
2676 /* Needed for trigraph_replace and multiline string warning. */
2677 buffer->cur = cur;
2678
2679 /* Scan for trigraphs before checking if backslash-escaped. */
2680 if (CPP_OPTION (pfile, trigraphs)
2681 || CPP_OPTION (pfile, warn_trigraphs))
2682 {
f617b8e2 2683 namebuf = trigraph_replace (pfile, name->text + trigraphed_len,
c5a04734 2684 namebuf);
f617b8e2 2685 trigraphed_len = namebuf - 2 - (name->text + trigraphed_len);
c5a04734
ZW
2686 if (trigraphed_len < 0)
2687 trigraphed_len = 0;
2688 }
2689
2690 namebuf--; /* Drop the newline / terminator from the name. */
2691 if (IS_NEWLINE (c))
2692 {
2693 /* Drop a backslash newline, and continue. */
2694 if (namebuf[-1] == '\\')
2695 {
2696 handle_newline (cur, buffer->rlimit, c);
2697 namebuf--;
2698 continue;
2699 }
2700
2701 cur--;
2702
2703 /* In Fortran and assembly language, silently terminate
2704 strings of either variety at end of line. This is a
2705 kludge around not knowing where comments are in these
2706 languages. */
2707 if (CPP_OPTION (pfile, lang_fortran)
2708 || CPP_OPTION (pfile, lang_asm))
2709 goto out;
2710
2711 /* Character constants, headers and asserts may not
2712 extend over multiple lines. In Standard C, neither
2713 may strings. We accept multiline strings as an
2714 extension, but not in directives. */
d1d9a6bd 2715 if (!multiline_ok)
c5a04734
ZW
2716 goto unterminated;
2717
2718 cur++; /* Move forwards again. */
2719
2720 if (pfile->multiline_string_line == 0)
2721 {
2722 pfile->multiline_string_line = list->line;
2723 if (CPP_PEDANTIC (pfile))
2724 cpp_pedwarn (pfile, "multi-line string constant");
2725 }
2726
2727 *namebuf++ = '\n';
2728 handle_newline (cur, buffer->rlimit, c);
2729 }
2730 else
2731 {
2732 unsigned char *temp;
2733
2734 /* An odd number of consecutive backslashes represents
2735 an escaped terminator. */
2736 temp = namebuf - 1;
f617b8e2 2737 while (temp >= name->text && *temp == '\\')
c5a04734
ZW
2738 temp--;
2739
2740 if ((namebuf - temp) & 1)
2741 goto out;
2742 namebuf++;
2743 }
2744 }
2745 }
2746
2747 /* Run out of name space? */
2748 if (cur < buffer->rlimit)
2749 {
2750 list->name_used = namebuf - list->namebuf;
2751 auto_expand_name_space (list);
2752 goto expanded;
2753 }
2754
2755 /* We may not have trigraph-replaced the input for this code path,
2756 but as the input is in error by being unterminated we don't
2757 bother. Prevent warnings about no newlines at EOF. */
2758 if (IS_NEWLINE(cur[-1]))
2759 cur--;
2760
2761 unterminated:
2762 cpp_error (pfile, "missing terminating %c character", (int) terminator);
2763
2764 if (terminator == '\"' && pfile->multiline_string_line != list->line
2765 && pfile->multiline_string_line != 0)
2766 {
2767 cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2768 "possible start of unterminated string literal");
2769 pfile->multiline_string_line = 0;
2770 }
2771
2772 out:
2773 buffer->cur = cur;
f617b8e2 2774 name->len = namebuf - name->text;
c5a04734
ZW
2775 list->name_used = namebuf - list->namebuf;
2776
2777 if (null_count > 0)
2778 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2779 : "null character preserved"));
2780}
2781
5d7ee2fa
NB
2782/* The character TYPE helps us distinguish comment types: '*' = C
2783 style, '-' = Chill-style and '/' = C++ style. For code simplicity,
2784 the stored comment includes the comment start and any terminator. */
2785
2786#define COMMENT_START_LEN 2
c5a04734 2787static void
ad265aa4 2788save_comment (list, token, from, len, type)
c5a04734 2789 cpp_toklist *list;
ad265aa4 2790 cpp_token *token;
c5a04734
ZW
2791 const unsigned char *from;
2792 unsigned int len;
c5a04734
ZW
2793 unsigned int type;
2794{
5d7ee2fa
NB
2795 unsigned char *buffer;
2796
2797 len += COMMENT_START_LEN;
c5a04734 2798
c5a04734
ZW
2799 if (list->name_used + len > list->name_cap)
2800 expand_name_space (list, len);
2801
ad265aa4
NB
2802 INIT_TOKEN_NAME (list, token);
2803 token->type = CPP_COMMENT;
2804 token->val.name.len = len;
c5a04734 2805
d1d9a6bd
NB
2806 buffer = list->namebuf + list->name_used;
2807 list->name_used += len;
2808
2809 /* Copy the comment. */
5d7ee2fa
NB
2810 if (type == '*')
2811 {
2812 *buffer++ = '/';
2813 *buffer++ = '*';
2814 }
2815 else
2816 {
2817 *buffer++ = type;
2818 *buffer++ = type;
2819 }
5d7ee2fa 2820 memcpy (buffer, from, len - COMMENT_START_LEN);
c5a04734
ZW
2821}
2822
2823/*
2824 * The tokenizer's main loop. Returns a token list, representing a
f624ffa7
NB
2825 * logical line in the input file. On EOF after some tokens have
2826 * been processed, we return immediately. Then in next call, or if
2827 * EOF occurred at the beginning of a logical line, a single CPP_EOF
2828 * token is placed in the list.
c5a04734
ZW
2829 *
2830 * Implementation relies almost entirely on lookback, rather than
2831 * looking forwards. This means that tokenization requires just
2832 * a single pass of the file, even in the presence of trigraphs and
2833 * escaped newlines, providing significant performance benefits.
2834 * Trigraph overhead is negligible if they are disabled, and low
2835 * even when enabled.
2836 */
2837
d1d9a6bd
NB
2838#define IS_DIRECTIVE() (list->tokens[first_token].type == CPP_HASH)
2839
c5a04734
ZW
2840void
2841_cpp_lex_line (pfile, list)
2842 cpp_reader *pfile;
2843 cpp_toklist *list;
2844{
2845 cpp_token *cur_token, *token_limit;
2846 cpp_buffer *buffer = pfile->buffer;
2847 register const unsigned char *cur = buffer->cur;
2848 unsigned char flags = 0;
d1d9a6bd 2849 unsigned int first_token = list->tokens_used;
c5a04734 2850
f624ffa7 2851 list->line = CPP_BUF_LINE (buffer);
6ab3e7dd 2852 pfile->col_adjust = 0;
c5a04734
ZW
2853 expanded:
2854 token_limit = list->tokens + list->tokens_cap;
2855 cur_token = list->tokens + list->tokens_used;
2856
2857 for (; cur < buffer->rlimit && cur_token < token_limit;)
2858 {
2859 unsigned char c = *cur++;
2860
6ab3e7dd
NB
2861 /* Optimize whitespace skipping, as most tokens are probably
2862 separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
2863
c5a04734
ZW
2864 if (is_hspace ((unsigned int) c))
2865 {
6ab3e7dd
NB
2866 /* Step back to get the null warning and tab correction. */
2867 buffer->cur = cur - 1;
d1d9a6bd 2868 skip_whitespace (pfile, IS_DIRECTIVE ());
6ab3e7dd
NB
2869 cur = buffer->cur;
2870
c5a04734
ZW
2871 flags = PREV_WHITESPACE;
2872 if (cur == buffer->rlimit)
2873 break;
2874 c = *cur++;
2875 }
2876
2877 /* Initialize current token. Its type is set in the switch. */
6ab3e7dd 2878 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
c5a04734
ZW
2879 cur_token->flags = flags;
2880 flags = 0;
2881
2882 switch (c)
2883 {
2884 case '0': case '1': case '2': case '3': case '4':
2885 case '5': case '6': case '7': case '8': case '9':
d1d9a6bd
NB
2886 {
2887 int prev_dot;
c5a04734 2888
d1d9a6bd
NB
2889 cur--; /* Backup character. */
2890 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
2891 if (prev_dot)
2892 cur_token--;
2893 INIT_TOKEN_NAME (list, cur_token);
2894 /* Prepend an immediately previous CPP_DOT token. */
2895 if (prev_dot)
2896 {
2897 if (list->name_cap == list->name_used)
2898 auto_expand_name_space (list);
c5a04734 2899
d1d9a6bd
NB
2900 cur_token->val.name.len = 1;
2901 list->namebuf[list->name_used++] = '.';
2902 }
c5a04734 2903
d1d9a6bd
NB
2904 continue_number:
2905 cur_token->type = CPP_NUMBER; /* Before parse_number. */
2906 buffer->cur = cur;
2907 parse_number (pfile, list, &cur_token->val.name);
2908 cur = buffer->cur;
2909 cur_token++;
2910 }
c5a04734
ZW
2911 break;
2912
2913 letter:
2914 case '_':
2915 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2916 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2917 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2918 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2919 case 'y': case 'z':
2920 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2921 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2922 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2923 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2924 case 'Y': case 'Z':
c5a04734 2925 cur--; /* Backup character. */
d1d9a6bd 2926 INIT_TOKEN_NAME (list, cur_token);
c5a04734
ZW
2927 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
2928
2929 continue_name:
2930 buffer->cur = cur;
2931 parse_name (pfile, list, &cur_token->val.name);
2932 cur = buffer->cur;
2933
2934 /* Find handler for newly created / extended directive. */
d1d9a6bd 2935 if (IS_DIRECTIVE () && cur_token == &list->tokens[first_token + 1])
c5a04734
ZW
2936 _cpp_check_directive (list, cur_token);
2937 cur_token++;
2938 break;
2939
2940 case '\'':
2941 /* Fall through. */
2942 case '\"':
2943 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2944 /* Do we have a wide string? */
2945 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2946 && cur_token[-1].val.name.len == 1
f617b8e2 2947 && cur_token[-1].val.name.text[0] == 'L'
c5a04734
ZW
2948 && !CPP_TRADITIONAL (pfile))
2949 {
2950 /* No need for 'L' any more. */
2951 list->name_used--;
2952 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
2953 }
2954
2955 do_parse_string:
6ab3e7dd 2956 /* Here c is one of ' " or >. */
d1d9a6bd 2957 INIT_TOKEN_NAME (list, cur_token);
c5a04734 2958 buffer->cur = cur;
d1d9a6bd
NB
2959 parse_string2 (pfile, list, &cur_token->val.name, c,
2960 c == '"' && !IS_DIRECTIVE());
c5a04734
ZW
2961 cur = buffer->cur;
2962 cur_token++;
2963 break;
2964
2965 case '/':
2966 cur_token->type = CPP_DIV;
2967 if (IMMED_TOKEN ())
2968 {
2969 if (PREV_TOKEN_TYPE == CPP_DIV)
2970 {
2971 /* We silently allow C++ comments in system headers,
2972 irrespective of conformance mode, because lots of
2973 broken systems do that and trying to clean it up
2974 in fixincludes is a nightmare. */
c31a6508 2975 if (CPP_IN_SYSTEM_HEADER (pfile))
c5a04734
ZW
2976 goto do_line_comment;
2977 else if (CPP_OPTION (pfile, cplusplus_comments))
2978 {
2979 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
2980 && ! buffer->warned_cplusplus_comments)
2981 {
2982 buffer->cur = cur;
2983 cpp_pedwarn (pfile,
2984 "C++ style comments are not allowed in ISO C89");
2985 cpp_pedwarn (pfile,
2986 "(this will be reported only once per input file)");
2987 buffer->warned_cplusplus_comments = 1;
2988 }
2989 do_line_comment:
2990 buffer->cur = cur;
2991 if (cur[-2] != c)
2992 cpp_warning (pfile,
2993 "comment start split across lines");
b8f41010 2994 if (skip_line_comment2 (pfile))
c5a04734
ZW
2995 cpp_error_with_line (pfile, list->line,
2996 cur_token[-1].col,
2997 "multi-line comment");
c5a04734
ZW
2998
2999 /* Back-up to first '-' or '/'. */
ad265aa4
NB
3000 cur_token--;
3001 if (!CPP_OPTION (pfile, discard_comments)
3002 && (!IS_DIRECTIVE() || list->dirno == 0))
3003 save_comment (list, cur_token++, cur,
3004 buffer->cur - cur, c);
3005 cur = buffer->cur;
3006
c5a04734
ZW
3007 if (!CPP_OPTION (pfile, traditional))
3008 flags = PREV_WHITESPACE;
ad265aa4 3009 break;
c5a04734
ZW
3010 }
3011 }
3012 }
3013 cur_token++;
3014 break;
3015
3016 case '*':
3017 cur_token->type = CPP_MULT;
3018 if (IMMED_TOKEN ())
3019 {
3020 if (PREV_TOKEN_TYPE == CPP_DIV)
3021 {
3022 buffer->cur = cur;
3023 if (cur[-2] != '/')
3024 cpp_warning (pfile,
3025 "comment start '/*' split across lines");
b8f41010 3026 if (skip_block_comment2 (pfile))
c5a04734
ZW
3027 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
3028 "unterminated comment");
3029 else if (buffer->cur[-2] != '*')
3030 cpp_warning (pfile,
3031 "comment end '*/' split across lines");
c5a04734 3032
ad265aa4 3033 /* Back up to opening '/'. */
f617b8e2 3034 cur_token--;
ad265aa4
NB
3035 if (!CPP_OPTION (pfile, discard_comments)
3036 && (!IS_DIRECTIVE() || list->dirno == 0))
3037 save_comment (list, cur_token++, cur,
3038 buffer->cur - cur, c);
3039 cur = buffer->cur;
3040
c5a04734
ZW
3041 if (!CPP_OPTION (pfile, traditional))
3042 flags = PREV_WHITESPACE;
f617b8e2 3043 break;
c5a04734
ZW
3044 }
3045 else if (CPP_OPTION (pfile, cplusplus))
3046 {
3047 /* In C++, there are .* and ->* operators. */
3048 if (PREV_TOKEN_TYPE == CPP_DEREF)
3049 BACKUP_TOKEN (CPP_DEREF_STAR);
3050 else if (PREV_TOKEN_TYPE == CPP_DOT)
3051 BACKUP_TOKEN (CPP_DOT_STAR);
3052 }
3053 }
3054 cur_token++;
3055 break;
3056
3057 case '\n':
3058 case '\r':
3059 handle_newline (cur, buffer->rlimit, c);
fb4527c3 3060 if (PREV_TOKEN_TYPE == CPP_BACKSLASH && IMMED_TOKEN ())
c5a04734 3061 {
fb4527c3
NB
3062 /* Remove the escaped newline. Then continue to process
3063 any interrupted name or number. */
3064 cur_token--;
3065 if (IMMED_TOKEN ())
c5a04734 3066 {
fb4527c3
NB
3067 cur_token--;
3068 if (cur_token->type == CPP_NAME)
3069 goto continue_name;
3070 else if (cur_token->type == CPP_NUMBER)
3071 goto continue_number;
3072 cur_token++;
c5a04734 3073 }
fb4527c3
NB
3074 /* Remember whitespace setting. */
3075 flags = cur_token->flags;
3076 break;
c5a04734 3077 }
fb4527c3 3078 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
c5a04734 3079 {
fb4527c3
NB
3080 buffer->cur = cur;
3081 cpp_warning (pfile, "backslash and newline separated by space");
c5a04734 3082 }
f624ffa7
NB
3083 /* Skip vertical space until we have at least one token to
3084 return. */
3085 if (cur_token != &list->tokens[first_token])
3086 goto out;
3087 list->line = CPP_BUF_LINE (buffer);
3088 break;
c5a04734
ZW
3089
3090 case '-':
3091 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3092 {
3093 if (CPP_OPTION (pfile, chill))
3094 goto do_line_comment;
3095 REVISE_TOKEN (CPP_MINUS_MINUS);
3096 }
3097 else
3098 PUSH_TOKEN (CPP_MINUS);
3099 break;
3100
3101 /* The digraph flag checking ensures that ## and %:%:
3102 are interpreted as CPP_PASTE, but #%: and %:# are not. */
3103 make_hash:
3104 case '#':
3105 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3106 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3107 REVISE_TOKEN (CPP_PASTE);
3108 else
3109 PUSH_TOKEN (CPP_HASH);
3110 break;
3111
3112 case ':':
3113 cur_token->type = CPP_COLON;
3114 if (IMMED_TOKEN ())
3115 {
3116 if (PREV_TOKEN_TYPE == CPP_COLON
3117 && CPP_OPTION (pfile, cplusplus))
3118 BACKUP_TOKEN (CPP_SCOPE);
3119 /* Digraph: "<:" is a '[' */
3120 else if (PREV_TOKEN_TYPE == CPP_LESS)
3121 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3122 /* Digraph: "%:" is a '#' */
3123 else if (PREV_TOKEN_TYPE == CPP_MOD)
3124 {
3125 (--cur_token)->flags |= DIGRAPH;
3126 goto make_hash;
3127 }
3128 }
3129 cur_token++;
3130 break;
3131
3132 case '&':
3133 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3134 REVISE_TOKEN (CPP_AND_AND);
3135 else
3136 PUSH_TOKEN (CPP_AND);
3137 break;
3138
3139 make_or:
3140 case '|':
3141 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3142 REVISE_TOKEN (CPP_OR_OR);
3143 else
3144 PUSH_TOKEN (CPP_OR);
3145 break;
3146
3147 case '+':
3148 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3149 REVISE_TOKEN (CPP_PLUS_PLUS);
3150 else
3151 PUSH_TOKEN (CPP_PLUS);
3152 break;
3153
3154 case '=':
3155 /* This relies on equidistance of "?=" and "?" tokens. */
3156 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3157 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3158 else
3159 PUSH_TOKEN (CPP_EQ);
3160 break;
3161
3162 case '>':
3163 cur_token->type = CPP_GREATER;
3164 if (IMMED_TOKEN ())
3165 {
3166 if (PREV_TOKEN_TYPE == CPP_GREATER)
3167 BACKUP_TOKEN (CPP_RSHIFT);
3168 else if (PREV_TOKEN_TYPE == CPP_MINUS)
3169 BACKUP_TOKEN (CPP_DEREF);
3170 /* Digraph: ":>" is a ']' */
3171 else if (PREV_TOKEN_TYPE == CPP_COLON)
3172 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3173 /* Digraph: "%>" is a '}' */
3174 else if (PREV_TOKEN_TYPE == CPP_MOD)
3175 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3176 }
3177 cur_token++;
3178 break;
3179
3180 case '<':
3181 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3182 {
3183 REVISE_TOKEN (CPP_LSHIFT);
3184 break;
3185 }
3186 /* Is this the beginning of a header name? */
15dad1d9 3187 if (list->flags & SYNTAX_INCLUDE)
c5a04734
ZW
3188 {
3189 c = '>'; /* Terminator. */
3190 cur_token->type = CPP_HEADER_NAME;
3191 goto do_parse_string;
3192 }
3193 PUSH_TOKEN (CPP_LESS);
3194 break;
3195
3196 case '%':
3197 /* Digraph: "<%" is a '{' */
3198 cur_token->type = CPP_MOD;
3199 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3200 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3201 cur_token++;
3202 break;
3203
c5a04734
ZW
3204 case '?':
3205 if (cur + 1 < buffer->rlimit && *cur == '?'
3206 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3207 {
3208 /* Handle trigraph. */
3209 cur++;
3210 switch (*cur++)
3211 {
3212 case '(': goto make_open_square;
3213 case ')': goto make_close_square;
3214 case '<': goto make_open_brace;
3215 case '>': goto make_close_brace;
3216 case '=': goto make_hash;
3217 case '!': goto make_or;
3218 case '-': goto make_complement;
3219 case '/': goto make_backslash;
3220 case '\'': goto make_xor;
3221 }
3222 }
3223 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3224 {
3225 /* GNU C++ defines <? and >? operators. */
3226 if (PREV_TOKEN_TYPE == CPP_LESS)
3227 {
3228 REVISE_TOKEN (CPP_MIN);
3229 break;
3230 }
3231 else if (PREV_TOKEN_TYPE == CPP_GREATER)
3232 {
3233 REVISE_TOKEN (CPP_MAX);
3234 break;
3235 }
3236 }
3237 PUSH_TOKEN (CPP_QUERY);
3238 break;
3239
3240 case '.':
3241 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3242 && IMMED_TOKEN ()
3243 && !(cur_token[-1].flags & PREV_WHITESPACE))
3244 {
3245 cur_token -= 2;
3246 PUSH_TOKEN (CPP_ELLIPSIS);
3247 }
3248 else
3249 PUSH_TOKEN (CPP_DOT);
3250 break;
3251
cfd5b8b8
NB
3252 make_complement:
3253 case '~': PUSH_TOKEN (CPP_COMPL); break;
c5a04734
ZW
3254 make_xor:
3255 case '^': PUSH_TOKEN (CPP_XOR); break;
3256 make_open_brace:
3257 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3258 make_close_brace:
3259 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3260 make_open_square:
3261 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3262 make_close_square:
3263 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3264 make_backslash:
3265 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3266 case '!': PUSH_TOKEN (CPP_NOT); break;
3267 case ',': PUSH_TOKEN (CPP_COMMA); break;
3268 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
41e8b1d7 3269 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
cfd5b8b8 3270 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
c5a04734
ZW
3271
3272 case '$':
3273 if (CPP_OPTION (pfile, dollars_in_ident))
3274 goto letter;
3275 /* Fall through */
3276 default:
3277 cur_token->aux = c;
f617b8e2 3278 cur_token->val.name.len = 0; /* FIXME: needed for transition only */
c5a04734
ZW
3279 PUSH_TOKEN (CPP_OTHER);
3280 break;
3281 }
3282 }
3283
3284 /* Run out of token space? */
3285 if (cur_token == token_limit)
3286 {
3287 list->tokens_used = cur_token - list->tokens;
d1d9a6bd 3288 _cpp_expand_token_space (list, 256);
c5a04734
ZW
3289 goto expanded;
3290 }
3291
c5a04734 3292 cur_token->flags = flags;
f624ffa7 3293 if (cur_token == &list->tokens[first_token])
c5a04734 3294 {
f624ffa7
NB
3295 /* FIXME: move this warning to callers who care. */
3296 if (cur > buffer->buf && !IS_NEWLINE (cur[-1]))
3297 cpp_warning (pfile, "no newline at end of file");
3298 cur_token++->type = CPP_EOF;
c5a04734
ZW
3299 }
3300
3301 out:
f624ffa7 3302 list->tokens[first_token].flags |= BOL;
c5a04734 3303 buffer->cur = cur;
c5a04734 3304 list->tokens_used = cur_token - list->tokens;
c5a04734
ZW
3305}
3306
3fef5b2b
NB
3307/* Write the spelling of a token TOKEN to BUFFER. The buffer must
3308 already contain the enough space to hold the token's spelling. If
3309 WHITESPACE is true, and the token was preceded by whitespace,
3310 output a single space before the token proper. Returns a pointer
3311 to the character after the last character written. */
3312
3313static unsigned char *
f617b8e2 3314spell_token (pfile, token, buffer, whitespace)
3fef5b2b 3315 cpp_reader *pfile; /* Would be nice to be rid of this... */
d1d9a6bd 3316 const cpp_token *token;
3fef5b2b
NB
3317 unsigned char *buffer;
3318 int whitespace;
3319{
3320 /* Whitespace will not be wanted by handlers of the # and ##
3321 operators calling this function, but will be wanted by the
3322 function that writes out the preprocessed file. */
3323 if (whitespace && token->flags & PREV_WHITESPACE)
3324 *buffer++ = ' ';
3325
3326 switch (token_spellings[token->type].type)
3327 {
5d7ee2fa 3328 case SPELL_OPERATOR:
3fef5b2b
NB
3329 {
3330 const unsigned char *spelling;
3331 unsigned char c;
3332
3333 if (token->flags & DIGRAPH)
3334 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
3335 else
f617b8e2 3336 spelling = token_spellings[token->type].spelling;
3fef5b2b
NB
3337
3338 while ((c = *spelling++) != '\0')
3339 *buffer++ = c;
3340 }
3341 break;
3342
5d7ee2fa 3343 case SPELL_IDENT:
f617b8e2 3344 memcpy (buffer, token->val.name.text, token->val.name.len);
5d7ee2fa
NB
3345 buffer += token->val.name.len;
3346 break;
3347
3348 case SPELL_STRING:
3fef5b2b 3349 {
5d7ee2fa 3350 unsigned char c;
3fef5b2b 3351
5d7ee2fa
NB
3352 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
3353 *buffer++ = 'L';
3354 c = '\'';
3355 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
3356 c = '"';
3357 *buffer++ = c;
f617b8e2 3358 memcpy (buffer, token->val.name.text, token->val.name.len);
5d7ee2fa
NB
3359 buffer += token->val.name.len;
3360 *buffer++ = c;
3fef5b2b
NB
3361 }
3362 break;
3363
3364 case SPELL_CHAR:
3365 *buffer++ = token->aux;
3366 break;
3367
3368 case SPELL_NONE:
3369 cpp_ice (pfile, "Unspellable token");
3370 break;
3371 }
3372
3373 return buffer;
3374}
3375
3376/* Temporary function for illustrative purposes. */
c5a04734
ZW
3377void
3378_cpp_lex_file (pfile)
3379 cpp_reader* pfile;
3380{
c5a04734
ZW
3381 cpp_toklist* list;
3382
3383 init_trigraph_map ();
3384 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
d1d9a6bd 3385 _cpp_init_toklist (list, DUMMY_TOKEN);
c5a04734 3386
15dad1d9 3387 for (;;)
c5a04734 3388 {
c5a04734
ZW
3389 _cpp_lex_line (pfile, list);
3390 if (list->tokens[0].type == CPP_EOF)
3391 break;
3392
15dad1d9
ZW
3393#if 0
3394 if (list->dirno)
3395 _cpp_handle_directive (pfile, list);
c5a04734 3396 else
15dad1d9 3397#endif
c5a04734 3398 _cpp_output_list (pfile, list);
15dad1d9 3399 _cpp_clear_toklist (list);
c5a04734
ZW
3400 }
3401}
3402
b8f41010 3403/* Temporary function for illustrative purposes. */
c5a04734
ZW
3404static void
3405_cpp_output_list (pfile, list)
3406 cpp_reader *pfile;
3407 cpp_toklist *list;
3408{
f624ffa7 3409 unsigned int i;
c5a04734 3410
f624ffa7 3411 for (i = 0; i < list->tokens_used; i++)
c5a04734 3412 {
f624ffa7
NB
3413 CPP_RESERVE (pfile, TOKEN_LEN (&list->tokens[i]));
3414 pfile->limit = spell_token (pfile, &list->tokens[i], pfile->limit, 1);
c5a04734
ZW
3415 }
3416}
3417
3418#endif
This page took 0.495273 seconds and 5 git commands to generate.