]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
configure.in (stage1_warn_cflags): Add -Wstrict-prototypes -Wmissing-prototypes.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23#include "config.h"
24#include "system.h"
25#include "intl.h"
26#include "cpplib.h"
27#include "cpphash.h"
28
ff2b53ef
ZW
29#define PEEKBUF(BUFFER, N) \
30 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
31#define GETBUF(BUFFER) \
32 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
33#define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
34
35#define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
36#define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
37#define GETC() GETBUF (CPP_BUFFER (pfile))
38#define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
45b966db
ZW
39
40static void skip_block_comment PARAMS ((cpp_reader *));
41static void skip_line_comment PARAMS ((cpp_reader *));
42static int maybe_macroexpand PARAMS ((cpp_reader *, long));
43static int skip_comment PARAMS ((cpp_reader *, int));
44static int copy_comment PARAMS ((cpp_reader *, int));
45static void skip_string PARAMS ((cpp_reader *, int));
46static void parse_string PARAMS ((cpp_reader *, int));
47static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
64aaf407 48static void null_warning PARAMS ((cpp_reader *, unsigned int));
45b966db 49
f2d5f0cc
ZW
50static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
51 size_t, FILE *));
1368ee70
ZW
52static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
53 unsigned int));
54static void bump_column PARAMS ((cpp_printer *, unsigned int,
55 unsigned int));
c5a04734 56static void expand_name_space PARAMS ((cpp_toklist *, unsigned int));
1368ee70 57static void expand_token_space PARAMS ((cpp_toklist *));
9e62c811
ZW
58static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
59 unsigned int));
f2d5f0cc 60
c5a04734 61#define auto_expand_name_space(list) \
f617b8e2 62 expand_name_space ((list), 1 + (list)->name_cap / 2)
c5a04734 63
b8f41010
NB
64#ifdef NEW_LEXER
65
66static void expand_comment_space PARAMS ((cpp_toklist *));
67void init_trigraph_map PARAMS ((void));
68static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
69 unsigned char *));
70static const unsigned char *backslash_start PARAMS ((cpp_reader *,
71 const unsigned char *));
72static int skip_block_comment2 PARAMS ((cpp_reader *));
73static int skip_line_comment2 PARAMS ((cpp_reader *));
74static void skip_whitespace PARAMS ((cpp_reader *, int));
75static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
76static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
77static void parse_string2 PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
78 unsigned int));
79static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
80static void save_comment PARAMS ((cpp_toklist *, const unsigned char *,
81 unsigned int, unsigned int, unsigned int));
82void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
83
84static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
85
3fef5b2b 86static unsigned char * spell_token PARAMS ((cpp_reader *, cpp_token *,
f617b8e2 87 unsigned char *, int));
b8f41010
NB
88
89typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
90 cpp_token *));
91
92/* Macros on a cpp_name. */
93#define INIT_NAME(list, name) \
f617b8e2
NB
94 do {(name).len = 0; \
95 (name).text = (list)->namebuf + (list)->name_used;} while (0)
b8f41010
NB
96
97#define IS_DIRECTIVE(list) (TOK_TYPE (list, 0) == CPP_HASH)
b8f41010
NB
98
99/* Maybe put these in the ISTABLE eventually. */
100#define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
101#define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
102
103/* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
104 character, if any, is in buffer. */
105#define handle_newline(cur, limit, c) \
106 do {\
107 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
108 (cur)++; \
109 CPP_BUMP_LINE_CUR (pfile, (cur)); \
6ab3e7dd 110 pfile->col_adjust = 0; \
b8f41010
NB
111 } while (0)
112
113#define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
114#define PREV_TOKEN_TYPE (cur_token[-1].type)
115
f617b8e2
NB
116#define PUSH_TOKEN(ttype) cur_token++->type = ttype
117#define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
118#define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
119#define BACKUP_DIGRAPH(ttype) do { \
120 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
121
122/* An upper bound on the number of bytes needed to spell a token,
123 including preceding whitespace. */
124#define TOKEN_LEN(token) (5 + (token_spellings[token->type].type > \
125 SPELL_NONE ? token->val.name.len: 0))
126
127#endif
128
5d7ee2fa
NB
129/* Order here matters. Those beyond SPELL_NONE store their spelling
130 in the token list, and it's length in the token->val.name.len. */
131#define SPELL_OPERATOR 0
f617b8e2
NB
132#define SPELL_CHAR 2 /* FIXME: revert order after transition. */
133#define SPELL_NONE 1
5d7ee2fa
NB
134#define SPELL_IDENT 3
135#define SPELL_STRING 4
136
f617b8e2 137#define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
5d7ee2fa
NB
138#define I(e, s) {SPELL_IDENT, s},
139#define S(e, s) {SPELL_STRING, s},
b8f41010
NB
140#define C(e, s) {SPELL_CHAR, s},
141#define N(e, s) {SPELL_NONE, s},
b8f41010
NB
142
143static const struct token_spelling
144{
f617b8e2
NB
145 U_CHAR type;
146 const U_CHAR *spelling;
b8f41010
NB
147} token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
148
149#undef T
5d7ee2fa
NB
150#undef I
151#undef S
b8f41010
NB
152#undef C
153#undef N
b8f41010 154
45b966db
ZW
155/* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
156
157void
158_cpp_grow_token_buffer (pfile, n)
159 cpp_reader *pfile;
160 long n;
161{
162 long old_written = CPP_WRITTEN (pfile);
163 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
164 pfile->token_buffer = (U_CHAR *)
165 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
166 CPP_SET_WRITTEN (pfile, old_written);
167}
168
45b966db
ZW
169/* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
170 If BUFFER != NULL, then use the LENGTH characters in BUFFER
171 as the new input buffer.
172 Return the new buffer, or NULL on failure. */
173
174cpp_buffer *
175cpp_push_buffer (pfile, buffer, length)
176 cpp_reader *pfile;
177 const U_CHAR *buffer;
178 long length;
179{
180 cpp_buffer *buf = CPP_BUFFER (pfile);
181 cpp_buffer *new;
182 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
183 {
184 cpp_fatal (pfile, "macro or `#include' recursion too deep");
185 return NULL;
186 }
187
188 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
189
190 new->if_stack = pfile->if_stack;
45b966db 191 new->buf = new->cur = buffer;
ff2b53ef 192 new->rlimit = buffer + length;
45b966db 193 new->prev = buf;
ff2b53ef 194 new->mark = NULL;
45b966db
ZW
195 new->line_base = NULL;
196
197 CPP_BUFFER (pfile) = new;
198 return new;
199}
200
201cpp_buffer *
202cpp_pop_buffer (pfile)
203 cpp_reader *pfile;
204{
205 cpp_buffer *buf = CPP_BUFFER (pfile);
206 if (ACTIVE_MARK_P (pfile))
207 cpp_ice (pfile, "mark active in cpp_pop_buffer");
c56c2073
ZW
208
209 if (buf->ihash)
210 {
211 _cpp_unwind_if_stack (pfile, buf);
212 if (buf->buf)
213 free ((PTR) buf->buf);
214 if (pfile->system_include_depth)
215 pfile->system_include_depth--;
216 if (pfile->potential_control_macro)
217 {
218 buf->ihash->control_macro = pfile->potential_control_macro;
219 pfile->potential_control_macro = 0;
220 }
221 pfile->input_stack_listing_current = 0;
222 }
223 else if (buf->macro)
224 {
225 HASHNODE *m = buf->macro;
226
227 m->disabled = 0;
228 if ((m->type == T_FMACRO && buf->mapped)
229 || m->type == T_SPECLINE || m->type == T_FILE
230 || m->type == T_BASE_FILE || m->type == T_INCLUDE_LEVEL
231 || m->type == T_STDC)
232 free ((PTR) buf->buf);
233 }
45b966db
ZW
234 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
235 free (buf);
236 pfile->buffer_stack_depth--;
237 return CPP_BUFFER (pfile);
238}
239
f2d5f0cc
ZW
240/* Deal with the annoying semantics of fwrite. */
241static void
242safe_fwrite (pfile, buf, len, fp)
243 cpp_reader *pfile;
244 const U_CHAR *buf;
245 size_t len;
246 FILE *fp;
247{
248 size_t count;
45b966db 249
f2d5f0cc
ZW
250 while (len)
251 {
252 count = fwrite (buf, 1, len, fp);
253 if (count == 0)
254 goto error;
255 len -= count;
256 buf += count;
257 }
258 return;
259
260 error:
261 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
262}
263
264/* Notify the compiler proper that the current line number has jumped,
265 or the current file name has changed. */
266
267static void
1368ee70 268output_line_command (pfile, print, line)
45b966db 269 cpp_reader *pfile;
f2d5f0cc 270 cpp_printer *print;
1368ee70 271 unsigned int line;
45b966db 272{
1368ee70 273 cpp_buffer *ip = cpp_file_buffer (pfile);
f2d5f0cc
ZW
274 enum { same = 0, enter, leave, rname } change;
275 static const char * const codes[] = { "", " 1", " 2", "" };
276
277 if (CPP_OPTION (pfile, no_line_commands))
278 return;
279
f2d5f0cc
ZW
280 /* Determine whether the current filename has changed, and if so,
281 how. 'nominal_fname' values are unique, so they can be compared
282 by comparing pointers. */
283 if (ip->nominal_fname == print->last_fname)
284 change = same;
285 else
45b966db 286 {
f2d5f0cc
ZW
287 if (pfile->buffer_stack_depth == print->last_bsd)
288 change = rname;
289 else
45b966db 290 {
f2d5f0cc
ZW
291 if (pfile->buffer_stack_depth > print->last_bsd)
292 change = enter;
293 else
294 change = leave;
295 print->last_bsd = pfile->buffer_stack_depth;
45b966db 296 }
f2d5f0cc 297 print->last_fname = ip->nominal_fname;
45b966db 298 }
f2d5f0cc
ZW
299 /* If the current file has not changed, we can output a few newlines
300 instead if we want to increase the line number by a small amount.
301 We cannot do this if print->lineno is zero, because that means we
302 haven't output any line commands yet. (The very first line
303 command output is a `same_file' command.) */
304 if (change == same && print->lineno != 0
305 && line >= print->lineno && line < print->lineno + 8)
45b966db 306 {
f2d5f0cc 307 while (line > print->lineno)
45b966db 308 {
f2d5f0cc
ZW
309 putc ('\n', print->outf);
310 print->lineno++;
45b966db 311 }
f2d5f0cc 312 return;
45b966db 313 }
f2d5f0cc
ZW
314
315#ifndef NO_IMPLICIT_EXTERN_C
316 if (CPP_OPTION (pfile, cplusplus))
317 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
318 codes[change],
319 ip->system_header_p ? " 3" : "",
320 (ip->system_header_p == 2) ? " 4" : "");
321 else
322#endif
323 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
324 codes[change],
325 ip->system_header_p ? " 3" : "");
326 print->lineno = line;
327}
328
329/* Write the contents of the token_buffer to the output stream, and
330 clear the token_buffer. Also handles generating line commands and
331 keeping track of file transitions. */
332
333void
334cpp_output_tokens (pfile, print)
335 cpp_reader *pfile;
336 cpp_printer *print;
337{
1368ee70
ZW
338 cpp_buffer *ip;
339
f6fab919
ZW
340 if (CPP_WRITTEN (pfile) - print->written)
341 {
342 if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
343 print->lineno++;
344 safe_fwrite (pfile, pfile->token_buffer,
345 CPP_WRITTEN (pfile) - print->written, print->outf);
346 }
1368ee70
ZW
347
348 ip = cpp_file_buffer (pfile);
349 if (ip)
350 output_line_command (pfile, print, CPP_BUF_LINE (ip));
351
f2d5f0cc 352 CPP_SET_WRITTEN (pfile, print->written);
45b966db
ZW
353}
354
1368ee70
ZW
355/* Helper for cpp_output_list - increases the column number to match
356 what we expect it to be. */
357
358static void
359bump_column (print, from, to)
360 cpp_printer *print;
361 unsigned int from, to;
362{
363 unsigned int tabs, spcs;
364 unsigned int delta = to - from;
365
366 /* Only if FROM is 0, advance by tabs. */
367 if (from == 0)
368 tabs = delta / 8, spcs = delta % 8;
369 else
370 tabs = 0, spcs = delta;
371
372 while (tabs--) putc ('\t', print->outf);
373 while (spcs--) putc (' ', print->outf);
374}
375
376/* Write out the list L onto pfile->token_buffer. This function is
377 incomplete:
378
379 1) pfile->token_buffer is not going to continue to exist.
380 2) At the moment, tokens don't carry the information described
381 in cpplib.h; they are all strings.
382 3) The list has to be a complete line, and has to be written starting
383 at the beginning of a line. */
384
385void
386cpp_output_list (pfile, print, list)
387 cpp_reader *pfile;
388 cpp_printer *print;
389 const cpp_toklist *list;
390{
391 unsigned int i;
392 unsigned int curcol = 1;
393
394 /* XXX Probably does not do what is intended. */
395 if (print->lineno != list->line)
396 output_line_command (pfile, print, list->line);
397
398 for (i = 0; i < list->tokens_used; i++)
399 {
1920de47 400 if (TOK_TYPE (list, i) == CPP_VSPACE)
1368ee70
ZW
401 {
402 output_line_command (pfile, print, list->tokens[i].aux);
403 continue;
404 }
405
1920de47 406 if (curcol < TOK_COL (list, i))
1368ee70
ZW
407 {
408 /* Insert space to bring the column to what it should be. */
1920de47
ZW
409 bump_column (print, curcol - 1, TOK_COL (list, i));
410 curcol = TOK_COL (list, i);
1368ee70
ZW
411 }
412 /* XXX We may have to insert space to prevent an accidental
413 token paste. */
1920de47
ZW
414 safe_fwrite (pfile, TOK_NAME (list, i), TOK_LEN (list, i), print->outf);
415 curcol += TOK_LEN (list, i);
1368ee70
ZW
416 }
417}
418
f2d5f0cc
ZW
419/* Scan a string (which may have escape marks), perform macro expansion,
420 and write the result to the token_buffer. */
45b966db
ZW
421
422void
f2d5f0cc 423_cpp_expand_to_buffer (pfile, buf, length)
45b966db
ZW
424 cpp_reader *pfile;
425 const U_CHAR *buf;
426 int length;
427{
c56c2073 428 cpp_buffer *stop;
f2d5f0cc 429 enum cpp_ttype token;
f6fab919 430 U_CHAR *buf1;
45b966db
ZW
431
432 if (length < 0)
433 {
434 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
435 return;
436 }
437
f6fab919
ZW
438 /* Copy the buffer, because it might be in an unsafe place - for
439 example, a sequence on the token_buffer, where the pointers will
440 be invalidated if we enlarge the token_buffer. */
441 buf1 = alloca (length);
442 memcpy (buf1, buf, length);
443
45b966db 444 /* Set up the input on the input stack. */
c56c2073
ZW
445 stop = CPP_BUFFER (pfile);
446 if (cpp_push_buffer (pfile, buf1, length) == NULL)
45b966db 447 return;
c56c2073 448 CPP_BUFFER (pfile)->has_escapes = 1;
45b966db
ZW
449
450 /* Scan the input, create the output. */
f2d5f0cc
ZW
451 for (;;)
452 {
453 token = cpp_get_token (pfile);
c56c2073 454 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
f2d5f0cc 455 break;
f2d5f0cc 456 }
45b966db
ZW
457}
458
c56c2073 459/* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
f2d5f0cc
ZW
460
461void
462cpp_scan_buffer_nooutput (pfile)
463 cpp_reader *pfile;
464{
c56c2073 465 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
f2d5f0cc
ZW
466 enum cpp_ttype token;
467 unsigned int old_written = CPP_WRITTEN (pfile);
468 /* In no-output mode, we can ignore everything but directives. */
469 for (;;)
470 {
471 if (! pfile->only_seen_white)
472 _cpp_skip_rest_of_line (pfile);
473 token = cpp_get_token (pfile);
c56c2073 474 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
f2d5f0cc 475 break;
f2d5f0cc
ZW
476 }
477 CPP_SET_WRITTEN (pfile, old_written);
478}
479
c56c2073 480/* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
f2d5f0cc
ZW
481
482void
483cpp_scan_buffer (pfile, print)
484 cpp_reader *pfile;
485 cpp_printer *print;
486{
c56c2073 487 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
f2d5f0cc
ZW
488 enum cpp_ttype token;
489
490 for (;;)
491 {
492 token = cpp_get_token (pfile);
15dad1d9 493 if (token == CPP_VSPACE || token == CPP_EOF
f2d5f0cc
ZW
494 /* XXX Temporary kluge - force flush after #include only */
495 || (token == CPP_DIRECTIVE
496 && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
497 {
498 cpp_output_tokens (pfile, print);
c56c2073 499 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
f2d5f0cc 500 return;
f2d5f0cc
ZW
501 }
502 }
503}
504
45b966db
ZW
505/* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
506
507cpp_buffer *
508cpp_file_buffer (pfile)
509 cpp_reader *pfile;
510{
511 cpp_buffer *ip;
512
513 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
514 if (ip->ihash != NULL)
515 return ip;
516 return NULL;
517}
518
1368ee70
ZW
519/* Token-buffer helper functions. */
520
521/* Expand a token list's string space. */
522static void
c5a04734 523expand_name_space (list, len)
1368ee70 524 cpp_toklist *list;
c5a04734
ZW
525 unsigned int len;
526{
f617b8e2 527 const U_CHAR *old_namebuf;
f617b8e2
NB
528
529 old_namebuf = list->namebuf;
c5a04734
ZW
530 list->name_cap += len;
531 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
f617b8e2
NB
532
533 /* Fix up token text pointers. */
79f50f2a 534 if (list->namebuf != old_namebuf)
f617b8e2
NB
535 {
536 unsigned int i;
537
538 for (i = 0; i < list->tokens_used; i++)
539 if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
79f50f2a 540 list->tokens[i].val.name.text += (list->namebuf - old_namebuf);
f617b8e2 541 }
1368ee70
ZW
542}
543
544/* Expand the number of tokens in a list. */
545static void
546expand_token_space (list)
547 cpp_toklist *list;
548{
549 list->tokens_cap *= 2;
15dad1d9
ZW
550 if (list->flags & LIST_OFFSET)
551 list->tokens--;
1368ee70 552 list->tokens = (cpp_token *)
15dad1d9
ZW
553 xrealloc (list->tokens, (list->tokens_cap + 1) * sizeof (cpp_token));
554 if (list->flags & LIST_OFFSET)
555 list->tokens++; /* Skip the dummy. */
1368ee70
ZW
556}
557
c5a04734
ZW
558/* Initialize a token list. We allocate an extra token in front of
559 the token list, as this allows us to always peek at the previous
560 token without worrying about underflowing the list. */
15dad1d9
ZW
561void
562_cpp_init_toklist (list)
1368ee70 563 cpp_toklist *list;
1368ee70 564{
15dad1d9
ZW
565 /* Initialize token space. Put a dummy token before the start
566 that will fail matches. */
567 list->tokens_cap = 256; /* 4K's worth. */
568 list->tokens = (cpp_token *)
569 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
570 list->tokens[0].type = CPP_EOF;
571 list->tokens++;
572
573 /* Initialize name space. */
574 list->name_cap = 1024;
575 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
576
577 /* Only create a comment space on demand. */
578 list->comments_cap = 0;
579 list->comments = 0;
580
581 list->flags = LIST_OFFSET;
582 _cpp_clear_toklist (list);
583}
1368ee70 584
15dad1d9
ZW
585/* Clear a token list. */
586void
587_cpp_clear_toklist (list)
588 cpp_toklist *list;
589{
c5a04734
ZW
590 list->tokens_used = 0;
591 list->name_used = 0;
592 list->comments_used = 0;
15dad1d9
ZW
593 list->dirno = -1;
594 list->flags &= LIST_OFFSET; /* clear all but that one */
595}
596
597/* Free a token list. Does not free the list itself, which may be
598 embedded in a larger structure. */
599void
600_cpp_free_toklist (list)
601 cpp_toklist *list;
602{
603 if (list->comments)
604 free (list->comments);
605 if (list->flags & LIST_OFFSET)
606 free (list->tokens - 1); /* Backup over dummy token. */
607 else
608 free (list->tokens);
609 free (list->namebuf);
1368ee70
ZW
610}
611
15dad1d9
ZW
612/* Slice a token list: copy the sublist [START, FINISH) into COPY.
613 COPY is assumed not to be initialized. The comment space is not
614 copied. */
615void
616_cpp_slice_toklist (copy, start, finish)
617 cpp_toklist *copy;
618 const cpp_token *start, *finish;
619{
620 unsigned int i, n;
621 size_t bytes;
622
623 n = finish - start;
624 copy->tokens_cap = n;
625 copy->tokens = (cpp_token *) xmalloc (n * sizeof (cpp_token));
626 memcpy (copy->tokens, start, n * sizeof (cpp_token));
627
628 bytes = 0;
629 for (i = 0; i < n; i++)
630 if (token_spellings[start[i].type].type > SPELL_NONE)
631 bytes += start[i].val.name.len;
632
633 copy->namebuf = xmalloc (bytes);
634 bytes = 0;
635 for (i = 0; i < n; i++)
636 if (token_spellings[start[i].type].type > SPELL_NONE)
637 {
638 memcpy (copy->namebuf + bytes,
639 start[i].val.name.text, start[i].val.name.len);
640 copy->tokens[i].val.name.text = copy->namebuf + bytes;
641 bytes += start[i].val.name.len;
642 }
643
644 copy->tokens_cap = n;
645 copy->tokens_used = n;
646 copy->name_used = bytes;
647 copy->name_cap = bytes;
648 copy->comments = 0;
649 copy->comments_cap = 0;
650 copy->comments_used = 0;
651
652 copy->flags = 0;
653 copy->dirno = -1;
654}
1368ee70 655
15dad1d9 656/* Shrink a token list down to the minimum size. */
1368ee70 657void
15dad1d9
ZW
658_cpp_squeeze_toklist (list)
659 cpp_toklist *list;
660{
661 long delta;
662 const U_CHAR *old_namebuf;
663
664 if (list->flags & LIST_OFFSET)
665 {
666 list->tokens--;
667 memmove (list->tokens, list->tokens + 1,
668 list->tokens_used * sizeof (cpp_token));
669 list->tokens = xrealloc (list->tokens,
670 list->tokens_used * sizeof (cpp_token));
671 list->flags &= ~LIST_OFFSET;
672 }
673 else
674 list->tokens = xrealloc (list->tokens,
675 list->tokens_used * sizeof (cpp_token));
676 list->tokens_cap = list->tokens_used;
677
678 old_namebuf = list->namebuf;
679 list->namebuf = xrealloc (list->namebuf, list->name_used);
680 list->name_cap = list->name_used;
681
682 /* Fix up token text pointers. */
683 delta = list->namebuf - old_namebuf;
684 if (delta)
685 {
686 unsigned int i;
687
688 for (i = 0; i < list->tokens_used; i++)
689 if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
690 list->tokens[i].val.name.text += delta;
691 }
692
693 if (list->comments_cap)
694 {
695 list->comments = xrealloc (list->comments,
696 list->comments_used * sizeof (cpp_token));
697 list->comments_cap = list->comments_used;
698 }
699}
700
701/* Compare two tokens. */
702int
703_cpp_equiv_tokens (a, b)
704 const cpp_token *a, *b;
705{
706 if (a->type != b->type
707 || a->flags != b->flags
708 || a->aux != b->aux)
709 return 0;
710
711 if (token_spellings[a->type].type > SPELL_NONE)
712 {
713 if (a->val.name.len != b->val.name.len
714 || ustrncmp(a->val.name.text,
715 b->val.name.text,
716 a->val.name.len))
717 return 0;
718 }
719 return 1;
720}
721
722/* Compare two token lists. */
723int
724_cpp_equiv_toklists (a, b)
725 const cpp_toklist *a, *b;
726{
727 unsigned int i;
728
729 if (a->tokens_used != b->tokens_used)
730 return 0;
731
732 for (i = 0; i < a->tokens_used; i++)
733 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
734 return 0;
735 return 1;
736}
737
738/* Scan until we encounter a token of type STOP or a newline, and
739 create a token list for it. Does not macro-expand or execute
740 directives. The final token is not included in the list or
741 consumed from the input. Returns the type of the token stopped at. */
742
743enum cpp_ttype
744_cpp_scan_until (pfile, list, stop)
1368ee70
ZW
745 cpp_reader *pfile;
746 cpp_toklist *list;
15dad1d9 747 enum cpp_ttype stop;
1368ee70
ZW
748{
749 int i, col;
750 long written, len;
751 enum cpp_ttype type;
9e62c811 752 int space_before;
1368ee70 753
15dad1d9
ZW
754 _cpp_clear_toklist (list);
755 list->line = CPP_BUF_LINE (CPP_BUFFER (pfile));
1368ee70
ZW
756
757 written = CPP_WRITTEN (pfile);
758 i = 0;
9e62c811 759 space_before = 0;
1368ee70
ZW
760 for (;;)
761 {
762 col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
763 type = _cpp_lex_token (pfile);
764 len = CPP_WRITTEN (pfile) - written;
765 CPP_SET_WRITTEN (pfile, written);
766 if (type == CPP_HSPACE)
9e62c811
ZW
767 {
768 if (CPP_PEDANTIC (pfile))
769 pedantic_whitespace (pfile, pfile->token_buffer + written, len);
770 space_before = 1;
771 continue;
772 }
0f89df67
ZW
773 else if (type == CPP_COMMENT)
774 /* Only happens when processing -traditional macro definitions.
775 Do not give this a token entry, but do not change space_before
776 either. */
777 continue;
1368ee70
ZW
778
779 if (list->tokens_used >= list->tokens_cap)
780 expand_token_space (list);
781 if (list->name_used + len >= list->name_cap)
bb1ec1d7 782 expand_name_space (list, list->name_used + len + 1 - list->name_cap);
1368ee70 783
9e62c811
ZW
784 if (type == CPP_MACRO)
785 type = CPP_NAME;
786
15dad1d9
ZW
787 if (type == CPP_VSPACE || type == stop)
788 break;
789
1368ee70 790 list->tokens_used++;
1920de47
ZW
791 TOK_TYPE (list, i) = type;
792 TOK_COL (list, i) = col;
a58f64f5 793 TOK_AUX (list, i) = 0;
1920de47 794 TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
9e62c811 795
1920de47 796 TOK_LEN (list, i) = len;
f617b8e2
NB
797 if (token_spellings[type].type > SPELL_NONE)
798 {
799 memcpy (list->namebuf + list->name_used, CPP_PWRITTEN (pfile), len);
800 TOK_NAME (list, i) = list->namebuf + list->name_used;
801 list->name_used += len;
802 }
803 else
804 TOK_NAME (list, i) = token_spellings[type].spelling;
1368ee70 805 i++;
9e62c811 806 space_before = 0;
1368ee70 807 }
9e62c811 808
15dad1d9 809 /* XXX Temporary kluge: put back the newline (or whatever). */
9e62c811 810 FORWARD(-1);
1368ee70 811
15dad1d9
ZW
812 /* Don't consider the first token to have white before. */
813 TOK_FLAGS (list, 0) &= ~PREV_WHITESPACE;
814 return type;
815}
1368ee70 816
45b966db
ZW
817/* Skip a C-style block comment. We know it's a comment, and point is
818 at the second character of the starter. */
819static void
820skip_block_comment (pfile)
821 cpp_reader *pfile;
822{
3a2b2c7a 823 unsigned int line, col;
61474454 824 const U_CHAR *limit, *cur;
45b966db
ZW
825
826 FORWARD(1);
3a2b2c7a
ZW
827 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
828 col = CPP_BUF_COL (CPP_BUFFER (pfile));
61474454
NB
829 limit = CPP_BUFFER (pfile)->rlimit;
830 cur = CPP_BUFFER (pfile)->cur;
831
832 while (cur < limit)
45b966db 833 {
61474454
NB
834 char c = *cur++;
835 if (c == '\n' || c == '\r')
45b966db
ZW
836 {
837 /* \r cannot be a macro escape marker here. */
838 if (!ACTIVE_MARK_P (pfile))
61474454
NB
839 CPP_BUMP_LINE_CUR (pfile, cur);
840 }
841 else if (c == '*')
842 {
843 /* Check for teminator. */
844 if (cur < limit && *cur == '/')
845 goto out;
846
847 /* Warn about comment starter embedded in comment. */
848 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
849 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
850 cur - CPP_BUFFER (pfile)->line_base,
851 "'/*' within comment");
45b966db 852 }
45b966db 853 }
61474454
NB
854
855 cpp_error_with_line (pfile, line, col, "unterminated comment");
856 cur--;
857 out:
858 CPP_BUFFER (pfile)->cur = cur + 1;
45b966db
ZW
859}
860
861/* Skip a C++/Chill line comment. We know it's a comment, and point
862 is at the second character of the initiator. */
863static void
864skip_line_comment (pfile)
865 cpp_reader *pfile;
866{
867 FORWARD(1);
868 for (;;)
869 {
870 int c = GETC ();
871
872 /* We don't have to worry about EOF in here. */
873 if (c == '\n')
874 {
875 /* Don't consider final '\n' to be part of comment. */
876 FORWARD(-1);
877 return;
878 }
879 else if (c == '\r')
880 {
881 /* \r cannot be a macro escape marker here. */
882 if (!ACTIVE_MARK_P (pfile))
883 CPP_BUMP_LINE (pfile);
ae79697b 884 if (CPP_OPTION (pfile, warn_comments))
45b966db
ZW
885 cpp_warning (pfile, "backslash-newline within line comment");
886 }
887 }
888}
889
890/* Skip a comment - C, C++, or Chill style. M is the first character
891 of the comment marker. If this really is a comment, skip to its
892 end and return ' '. If this is not a comment, return M (which will
893 be '/' or '-'). */
894
895static int
896skip_comment (pfile, m)
897 cpp_reader *pfile;
898 int m;
899{
900 if (m == '/' && PEEKC() == '*')
901 {
902 skip_block_comment (pfile);
903 return ' ';
904 }
905 else if (m == '/' && PEEKC() == '/')
906 {
907 if (CPP_BUFFER (pfile)->system_header_p)
908 {
909 /* We silently allow C++ comments in system headers, irrespective
910 of conformance mode, because lots of busted systems do that
911 and trying to clean it up in fixincludes is a nightmare. */
912 skip_line_comment (pfile);
913 return ' ';
914 }
ae79697b 915 else if (CPP_OPTION (pfile, cplusplus_comments))
45b966db 916 {
0f89df67 917 if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
45b966db 918 {
0f89df67
ZW
919 if (CPP_WTRADITIONAL (pfile))
920 cpp_pedwarn (pfile,
921 "C++ style comments are not allowed in traditional C");
922 else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
923 cpp_pedwarn (pfile,
924 "C++ style comments are not allowed in ISO C89");
925 if (CPP_WTRADITIONAL (pfile)
926 || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
927 cpp_pedwarn (pfile,
45b966db
ZW
928 "(this will be reported only once per input file)");
929 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
930 }
931 skip_line_comment (pfile);
932 return ' ';
933 }
934 else
935 return m;
936 }
937 else if (m == '-' && PEEKC() == '-'
ae79697b 938 && CPP_OPTION (pfile, chill))
45b966db
ZW
939 {
940 skip_line_comment (pfile);
941 return ' ';
942 }
943 else
944 return m;
945}
946
947/* Identical to skip_comment except that it copies the comment into the
948 token_buffer. This is used if !discard_comments. */
949static int
950copy_comment (pfile, m)
951 cpp_reader *pfile;
952 int m;
953{
954 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
955 const U_CHAR *limit;
956
957 if (skip_comment (pfile, m) == m)
958 return m;
959
960 limit = CPP_BUFFER (pfile)->cur;
961 CPP_RESERVE (pfile, limit - start + 2);
962 CPP_PUTC_Q (pfile, m);
963 for (; start <= limit; start++)
964 if (*start != '\r')
965 CPP_PUTC_Q (pfile, *start);
966
967 return ' ';
968}
969
64aaf407
NB
970static void
971null_warning (pfile, count)
972 cpp_reader *pfile;
973 unsigned int count;
974{
975 if (count == 1)
976 cpp_warning (pfile, "embedded null character ignored");
977 else
978 cpp_warning (pfile, "embedded null characters ignored");
979}
980
45b966db
ZW
981/* Skip whitespace \-newline and comments. Does not macro-expand. */
982
983void
984_cpp_skip_hspace (pfile)
985 cpp_reader *pfile;
986{
64aaf407 987 unsigned int null_count = 0;
45b966db 988 int c;
64aaf407 989
45b966db
ZW
990 while (1)
991 {
992 c = GETC();
993 if (c == EOF)
64aaf407 994 goto out;
45b966db
ZW
995 else if (is_hspace(c))
996 {
997 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
998 cpp_pedwarn (pfile, "%s in preprocessing directive",
999 c == '\f' ? "formfeed" : "vertical tab");
64aaf407
NB
1000 else if (c == '\0')
1001 null_count++;
45b966db
ZW
1002 }
1003 else if (c == '\r')
1004 {
1005 /* \r is a backslash-newline marker if !has_escapes, and
1006 a deletable-whitespace or no-reexpansion marker otherwise. */
1007 if (CPP_BUFFER (pfile)->has_escapes)
1008 {
1009 if (PEEKC() == ' ')
1010 FORWARD(1);
1011 else
1012 break;
1013 }
1014 else
1015 CPP_BUMP_LINE (pfile);
1016 }
1017 else if (c == '/' || c == '-')
1018 {
1019 c = skip_comment (pfile, c);
1020 if (c != ' ')
1021 break;
1022 }
1023 else
1024 break;
1025 }
1026 FORWARD(-1);
64aaf407
NB
1027 out:
1028 if (null_count)
1029 null_warning (pfile, null_count);
45b966db
ZW
1030}
1031
1032/* Read and discard the rest of the current line. */
1033
1034void
1035_cpp_skip_rest_of_line (pfile)
1036 cpp_reader *pfile;
1037{
1038 for (;;)
1039 {
1040 int c = GETC();
1041 switch (c)
1042 {
1043 case '\n':
1044 FORWARD(-1);
1045 case EOF:
1046 return;
1047
1048 case '\r':
1049 if (! CPP_BUFFER (pfile)->has_escapes)
1050 CPP_BUMP_LINE (pfile);
1051 break;
1052
1053 case '\'':
1054 case '\"':
1055 skip_string (pfile, c);
1056 break;
1057
1058 case '/':
1059 case '-':
1060 skip_comment (pfile, c);
1061 break;
1062
1063 case '\f':
1064 case '\v':
1065 if (CPP_PEDANTIC (pfile))
1066 cpp_pedwarn (pfile, "%s in preprocessing directive",
1067 c == '\f' ? "formfeed" : "vertical tab");
1068 break;
1069
1070 }
1071 }
1072}
1073
1074/* Parse an identifier starting with C. */
1075
1076void
1077_cpp_parse_name (pfile, c)
1078 cpp_reader *pfile;
1079 int c;
1080{
1081 for (;;)
1082 {
1083 if (! is_idchar(c))
1084 {
1085 FORWARD (-1);
1086 break;
1087 }
1088
1089 if (c == '$' && CPP_PEDANTIC (pfile))
1090 cpp_pedwarn (pfile, "`$' in identifier");
1091
1092 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
1093 CPP_PUTC_Q (pfile, c);
1094 c = GETC();
1095 if (c == EOF)
1096 break;
1097 }
45b966db
ZW
1098 return;
1099}
1100
1101/* Parse and skip over a string starting with C. A single quoted
1102 string is treated like a double -- some programs (e.g., troff) are
1103 perverse this way. (However, a single quoted string is not allowed
1104 to extend over multiple lines.) */
1105static void
1106skip_string (pfile, c)
1107 cpp_reader *pfile;
1108 int c;
1109{
3a2b2c7a 1110 unsigned int start_line, start_column;
64aaf407 1111 unsigned int null_count = 0;
45b966db 1112
3a2b2c7a
ZW
1113 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
1114 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
45b966db
ZW
1115 while (1)
1116 {
1117 int cc = GETC();
1118 switch (cc)
1119 {
1120 case EOF:
1121 cpp_error_with_line (pfile, start_line, start_column,
1122 "unterminated string or character constant");
1123 if (pfile->multiline_string_line != start_line
1124 && pfile->multiline_string_line != 0)
1125 cpp_error_with_line (pfile,
1126 pfile->multiline_string_line, -1,
1127 "possible real start of unterminated constant");
1128 pfile->multiline_string_line = 0;
64aaf407 1129 goto out;
45b966db 1130
64aaf407
NB
1131 case '\0':
1132 null_count++;
1133 break;
1134
45b966db
ZW
1135 case '\n':
1136 CPP_BUMP_LINE (pfile);
1137 /* In Fortran and assembly language, silently terminate
1138 strings of either variety at end of line. This is a
1139 kludge around not knowing where comments are in these
1140 languages. */
ae79697b
ZW
1141 if (CPP_OPTION (pfile, lang_fortran)
1142 || CPP_OPTION (pfile, lang_asm))
45b966db
ZW
1143 {
1144 FORWARD(-1);
64aaf407 1145 goto out;
45b966db
ZW
1146 }
1147 /* Character constants may not extend over multiple lines.
1148 In Standard C, neither may strings. We accept multiline
1149 strings as an extension. */
1150 if (c == '\'')
1151 {
1152 cpp_error_with_line (pfile, start_line, start_column,
1153 "unterminated character constant");
1154 FORWARD(-1);
64aaf407 1155 goto out;
45b966db
ZW
1156 }
1157 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
1158 cpp_pedwarn_with_line (pfile, start_line, start_column,
1159 "string constant runs past end of line");
1160 if (pfile->multiline_string_line == 0)
1161 pfile->multiline_string_line = start_line;
1162 break;
1163
1164 case '\r':
1165 if (CPP_BUFFER (pfile)->has_escapes)
1166 {
1167 cpp_ice (pfile, "\\r escape inside string constant");
1168 FORWARD(1);
1169 }
1170 else
1171 /* Backslash newline is replaced by nothing at all. */
1172 CPP_BUMP_LINE (pfile);
1173 break;
1174
1175 case '\\':
1176 FORWARD(1);
1177 break;
1178
1179 case '\"':
1180 case '\'':
1181 if (cc == c)
64aaf407 1182 goto out;
45b966db
ZW
1183 break;
1184 }
1185 }
64aaf407
NB
1186
1187 out:
1188 if (null_count == 1)
1189 cpp_warning (pfile, "null character in string or character constant");
1190 else if (null_count > 1)
1191 cpp_warning (pfile, "null characters in string or character constant");
45b966db
ZW
1192}
1193
1194/* Parse a string and copy it to the output. */
1195
1196static void
1197parse_string (pfile, c)
1198 cpp_reader *pfile;
1199 int c;
1200{
1201 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
1202 const U_CHAR *limit;
1203
1204 skip_string (pfile, c);
1205
1206 limit = CPP_BUFFER (pfile)->cur;
1207 CPP_RESERVE (pfile, limit - start + 2);
1208 CPP_PUTC_Q (pfile, c);
1209 for (; start < limit; start++)
1210 if (*start != '\r')
1211 CPP_PUTC_Q (pfile, *start);
1212}
1213
45b966db
ZW
1214/* Get the next token, and add it to the text in pfile->token_buffer.
1215 Return the kind of token we got. */
1216
3a2b2c7a 1217enum cpp_ttype
45b966db
ZW
1218_cpp_lex_token (pfile)
1219 cpp_reader *pfile;
1220{
5eec0563 1221 register int c, c2;
3a2b2c7a 1222 enum cpp_ttype token;
45b966db 1223
f2d5f0cc
ZW
1224 if (CPP_BUFFER (pfile) == NULL)
1225 return CPP_EOF;
1226
45b966db
ZW
1227 get_next:
1228 c = GETC();
1229 switch (c)
1230 {
1231 case EOF:
1232 return CPP_EOF;
1233
1234 case '/':
1235 if (PEEKC () == '=')
1236 goto op2;
1237
1238 comment:
ae79697b 1239 if (CPP_OPTION (pfile, discard_comments))
45b966db
ZW
1240 c = skip_comment (pfile, c);
1241 else
1242 c = copy_comment (pfile, c);
1243 if (c != ' ')
1244 goto randomchar;
1245
1246 /* Comments are equivalent to spaces.
1247 For -traditional, a comment is equivalent to nothing. */
ff2b53ef 1248 if (!CPP_OPTION (pfile, discard_comments))
45b966db 1249 return CPP_COMMENT;
9e62c811 1250 else if (CPP_TRADITIONAL (pfile))
15dad1d9 1251 goto get_next;
45b966db
ZW
1252 else
1253 {
1254 CPP_PUTC (pfile, c);
1255 return CPP_HSPACE;
1256 }
1257
1258 case '#':
5eec0563
JM
1259 CPP_PUTC (pfile, c);
1260
1261 hash:
15dad1d9
ZW
1262 c2 = PEEKC ();
1263 if (c2 == '#')
45b966db 1264 {
15dad1d9
ZW
1265 FORWARD (1);
1266 CPP_PUTC (pfile, c2);
1267 return CPP_PASTE;
45b966db 1268 }
15dad1d9 1269 else if (c2 == '%' && PEEKN (1) == ':')
45b966db 1270 {
15dad1d9
ZW
1271 /* Digraph: "%:" == "#". */
1272 FORWARD (1);
1273 CPP_RESERVE (pfile, 2);
1274 CPP_PUTC_Q (pfile, c2);
1275 CPP_PUTC_Q (pfile, GETC ());
1368ee70 1276 return CPP_PASTE;
45b966db 1277 }
15dad1d9
ZW
1278 else
1279 return CPP_HASH;
45b966db
ZW
1280
1281 case '\"':
1282 case '\'':
1283 parse_string (pfile, c);
45b966db
ZW
1284 return c == '\'' ? CPP_CHAR : CPP_STRING;
1285
1286 case '$':
ae79697b 1287 if (!CPP_OPTION (pfile, dollars_in_ident))
45b966db
ZW
1288 goto randomchar;
1289 goto letter;
1290
1291 case ':':
5eec0563
JM
1292 c2 = PEEKC ();
1293 /* Digraph: ":>" == "]". */
1294 if (c2 == '>'
1295 || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
45b966db
ZW
1296 goto op2;
1297 goto randomchar;
1298
1299 case '&':
1300 case '+':
1301 case '|':
1302 c2 = PEEKC ();
1303 if (c2 == c || c2 == '=')
1304 goto op2;
1305 goto randomchar;
1306
5eec0563
JM
1307 case '%':
1308 /* Digraphs: "%:" == "#", "%>" == "}". */
1309 c2 = PEEKC ();
1310 if (c2 == ':')
1311 {
1312 FORWARD (1);
1313 CPP_RESERVE (pfile, 2);
1314 CPP_PUTC_Q (pfile, c);
1315 CPP_PUTC_Q (pfile, c2);
1316 goto hash;
1317 }
1318 else if (c2 == '>')
1319 {
1320 FORWARD (1);
1321 CPP_RESERVE (pfile, 2);
1322 CPP_PUTC_Q (pfile, c);
1323 CPP_PUTC_Q (pfile, c2);
1368ee70 1324 return CPP_OPEN_BRACE;
5eec0563
JM
1325 }
1326 /* else fall through */
1327
45b966db
ZW
1328 case '*':
1329 case '!':
45b966db
ZW
1330 case '=':
1331 case '^':
1332 if (PEEKC () == '=')
1333 goto op2;
1334 goto randomchar;
1335
1336 case '-':
1337 c2 = PEEKC ();
1338 if (c2 == '-')
1339 {
ae79697b 1340 if (CPP_OPTION (pfile, chill))
45b966db
ZW
1341 goto comment; /* Chill style comment */
1342 else
1343 goto op2;
1344 }
1345 else if (c2 == '=')
1346 goto op2;
1347 else if (c2 == '>')
1348 {
ae79697b 1349 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
45b966db
ZW
1350 {
1351 /* In C++, there's a ->* operator. */
1352 token = CPP_OTHER;
45b966db
ZW
1353 CPP_RESERVE (pfile, 4);
1354 CPP_PUTC_Q (pfile, c);
1355 CPP_PUTC_Q (pfile, GETC ());
1356 CPP_PUTC_Q (pfile, GETC ());
45b966db
ZW
1357 return token;
1358 }
1359 goto op2;
1360 }
1361 goto randomchar;
1362
1363 case '<':
1364 if (pfile->parsing_include_directive)
1365 {
1366 for (;;)
1367 {
1368 CPP_PUTC (pfile, c);
1369 if (c == '>')
1370 break;
1371 c = GETC ();
1372 if (c == '\n' || c == EOF)
1373 {
1374 cpp_error (pfile,
1375 "missing '>' in `#include <FILENAME>'");
1376 break;
1377 }
1378 else if (c == '\r')
1379 {
1380 if (!CPP_BUFFER (pfile)->has_escapes)
1381 {
1382 /* Backslash newline is replaced by nothing. */
1383 CPP_ADJUST_WRITTEN (pfile, -1);
1384 CPP_BUMP_LINE (pfile);
1385 }
1386 else
1387 {
1388 /* We might conceivably get \r- or \r<space> in
1389 here. Just delete 'em. */
1390 int d = GETC();
1391 if (d != '-' && d != ' ')
1392 cpp_ice (pfile, "unrecognized escape \\r%c", d);
1393 CPP_ADJUST_WRITTEN (pfile, -1);
1394 }
1395 }
1396 }
1397 return CPP_STRING;
1398 }
5eec0563
JM
1399 /* Digraphs: "<%" == "{", "<:" == "[". */
1400 c2 = PEEKC ();
1401 if (c2 == '%')
1402 {
1403 FORWARD (1);
1404 CPP_RESERVE (pfile, 2);
1405 CPP_PUTC_Q (pfile, c);
1406 CPP_PUTC_Q (pfile, c2);
1368ee70 1407 return CPP_CLOSE_BRACE;
5eec0563
JM
1408 }
1409 else if (c2 == ':')
1410 goto op2;
45b966db
ZW
1411 /* else fall through */
1412 case '>':
1413 c2 = PEEKC ();
1414 if (c2 == '=')
1415 goto op2;
1416 /* GNU C++ supports MIN and MAX operators <? and >?. */
ae79697b 1417 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
45b966db
ZW
1418 goto randomchar;
1419 FORWARD(1);
5eec0563
JM
1420 CPP_RESERVE (pfile, 3);
1421 CPP_PUTC_Q (pfile, c);
1422 CPP_PUTC_Q (pfile, c2);
1423 if (PEEKC () == '=')
45b966db 1424 CPP_PUTC_Q (pfile, GETC ());
45b966db
ZW
1425 return CPP_OTHER;
1426
1427 case '.':
1428 c2 = PEEKC ();
5eec0563 1429 if (ISDIGIT (c2))
45b966db 1430 {
5eec0563 1431 CPP_PUTC (pfile, c);
45b966db
ZW
1432 c = GETC ();
1433 goto number;
1434 }
1435
1436 /* In C++ there's a .* operator. */
ae79697b 1437 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
45b966db
ZW
1438 goto op2;
1439
1440 if (c2 == '.' && PEEKN(1) == '.')
1441 {
5eec0563 1442 CPP_RESERVE (pfile, 3);
45b966db
ZW
1443 CPP_PUTC_Q (pfile, '.');
1444 CPP_PUTC_Q (pfile, '.');
1445 CPP_PUTC_Q (pfile, '.');
1446 FORWARD (2);
1368ee70 1447 return CPP_ELLIPSIS;
45b966db
ZW
1448 }
1449 goto randomchar;
1450
1451 op2:
5eec0563 1452 CPP_RESERVE (pfile, 2);
45b966db
ZW
1453 CPP_PUTC_Q (pfile, c);
1454 CPP_PUTC_Q (pfile, GETC ());
5eec0563 1455 return CPP_OTHER;
45b966db
ZW
1456
1457 case 'L':
1458 c2 = PEEKC ();
1459 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1460 {
1461 CPP_PUTC (pfile, c);
1462 c = GETC ();
1463 parse_string (pfile, c);
45b966db
ZW
1464 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1465 }
1466 goto letter;
1467
1468 case '0': case '1': case '2': case '3': case '4':
1469 case '5': case '6': case '7': case '8': case '9':
1470 number:
1471 c2 = '.';
1472 for (;;)
1473 {
1474 CPP_RESERVE (pfile, 2);
1475 CPP_PUTC_Q (pfile, c);
1476 c = PEEKC ();
1477 if (c == EOF)
1478 break;
1479 if (!is_numchar(c) && c != '.'
1480 && ((c2 != 'e' && c2 != 'E'
1481 && ((c2 != 'p' && c2 != 'P')
ae79697b 1482 || CPP_OPTION (pfile, c89)))
45b966db
ZW
1483 || (c != '+' && c != '-')))
1484 break;
1485 FORWARD(1);
1486 c2= c;
1487 }
45b966db
ZW
1488 return CPP_NUMBER;
1489 case 'b': case 'c': case 'd': case 'h': case 'o':
1490 case 'B': case 'C': case 'D': case 'H': case 'O':
ae79697b 1491 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
45b966db 1492 {
45b966db
ZW
1493 CPP_RESERVE (pfile, 2);
1494 CPP_PUTC_Q (pfile, c);
1495 CPP_PUTC_Q (pfile, '\'');
1496 FORWARD(1);
1497 for (;;)
1498 {
1499 c = GETC();
1500 if (c == EOF)
1501 goto chill_number_eof;
1502 if (!is_numchar(c))
1503 break;
1504 CPP_PUTC (pfile, c);
1505 }
1506 if (c == '\'')
1507 {
1508 CPP_RESERVE (pfile, 2);
1509 CPP_PUTC_Q (pfile, c);
45b966db
ZW
1510 return CPP_STRING;
1511 }
1512 else
1513 {
1514 FORWARD(-1);
1515 chill_number_eof:
45b966db
ZW
1516 return CPP_NUMBER;
1517 }
1518 }
1519 else
1520 goto letter;
1521 case '_':
1522 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1523 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1524 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1525 case 'x': case 'y': case 'z':
1526 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1527 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1528 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1529 case 'Y': case 'Z':
1530 letter:
45b966db
ZW
1531 _cpp_parse_name (pfile, c);
1532 return CPP_MACRO;
1533
64aaf407
NB
1534 case ' ': case '\t': case '\v': case '\f': case '\0':
1535 {
1536 int null_count = 0;
1537
1538 for (;;)
1539 {
1540 if (c == '\0')
1541 null_count++;
1542 else
1543 CPP_PUTC (pfile, c);
1544 c = PEEKC ();
1545 if (c == EOF || !is_hspace(c))
1546 break;
1547 FORWARD(1);
1548 }
1549 if (null_count)
1550 null_warning (pfile, null_count);
1551 return CPP_HSPACE;
1552 }
45b966db
ZW
1553
1554 case '\r':
1555 if (CPP_BUFFER (pfile)->has_escapes)
1556 {
1557 c = GETC ();
1558 if (c == '-')
1559 {
1560 if (pfile->output_escapes)
1561 CPP_PUTS (pfile, "\r-", 2);
1562 _cpp_parse_name (pfile, GETC ());
1563 return CPP_NAME;
1564 }
1565 else if (c == ' ')
1566 {
ff2b53ef
ZW
1567 /* "\r " means a space, but only if necessary to prevent
1568 accidental token concatenation. */
45b966db
ZW
1569 CPP_RESERVE (pfile, 2);
1570 if (pfile->output_escapes)
1571 CPP_PUTC_Q (pfile, '\r');
1572 CPP_PUTC_Q (pfile, c);
1573 return CPP_HSPACE;
1574 }
1575 else
1576 {
1577 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1578 goto get_next;
1579 }
1580 }
1581 else
1582 {
1583 /* Backslash newline is ignored. */
cbccf5e8
MM
1584 if (!ACTIVE_MARK_P (pfile))
1585 CPP_BUMP_LINE (pfile);
45b966db
ZW
1586 goto get_next;
1587 }
1588
1589 case '\n':
1590 CPP_PUTC (pfile, c);
45b966db
ZW
1591 return CPP_VSPACE;
1592
1368ee70
ZW
1593 case '(': token = CPP_OPEN_PAREN; goto char1;
1594 case ')': token = CPP_CLOSE_PAREN; goto char1;
1595 case '{': token = CPP_OPEN_BRACE; goto char1;
1596 case '}': token = CPP_CLOSE_BRACE; goto char1;
1597 case ',': token = CPP_COMMA; goto char1;
1598 case ';': token = CPP_SEMICOLON; goto char1;
45b966db
ZW
1599
1600 randomchar:
1601 default:
1602 token = CPP_OTHER;
1603 char1:
45b966db
ZW
1604 CPP_PUTC (pfile, c);
1605 return token;
1606 }
1607}
1608
1609/* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1610 Caller is expected to have checked no_macro_expand. */
1611static int
1612maybe_macroexpand (pfile, written)
1613 cpp_reader *pfile;
1614 long written;
1615{
1616 U_CHAR *macro = pfile->token_buffer + written;
1617 size_t len = CPP_WRITTEN (pfile) - written;
1618 HASHNODE *hp = _cpp_lookup (pfile, macro, len);
1619
a7abcbbf
ZW
1620 /* _cpp_lookup never returns null. */
1621 if (hp->type == T_VOID)
45b966db 1622 return 0;
d9e0bd53 1623 if (hp->disabled || hp->type == T_IDENTITY)
45b966db
ZW
1624 {
1625 if (pfile->output_escapes)
1626 {
1627 /* Insert a no-reexpand marker before IDENT. */
1628 CPP_RESERVE (pfile, 2);
1629 CPP_ADJUST_WRITTEN (pfile, 2);
1630 macro = pfile->token_buffer + written;
1631
1632 memmove (macro + 2, macro, len);
1633 macro[0] = '\r';
1634 macro[1] = '-';
1635 }
1636 return 0;
1637 }
ff2b53ef
ZW
1638 if (hp->type == T_EMPTY)
1639 {
1640 /* Special case optimization: macro expands to nothing. */
1641 CPP_SET_WRITTEN (pfile, written);
1642 CPP_PUTC_Q (pfile, ' ');
1643 return 1;
1644 }
45b966db
ZW
1645
1646 /* If macro wants an arglist, verify that a '(' follows. */
d9e0bd53 1647 if (hp->type == T_FMACRO)
45b966db
ZW
1648 {
1649 int macbuf_whitespace = 0;
1650 int c;
1651
1652 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1653 {
1654 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1655 for (;;)
1656 {
1657 _cpp_skip_hspace (pfile);
1658 c = PEEKC ();
1659 if (c == '\n')
1660 FORWARD(1);
1661 else
1662 break;
1663 }
1664 if (point != CPP_BUFFER (pfile)->cur)
1665 macbuf_whitespace = 1;
1666 if (c == '(')
1667 goto is_macro_call;
1668 else if (c != EOF)
1669 goto not_macro_call;
1670 cpp_pop_buffer (pfile);
1671 }
1672
1673 CPP_SET_MARK (pfile);
1674 for (;;)
1675 {
1676 _cpp_skip_hspace (pfile);
1677 c = PEEKC ();
1678 if (c == '\n')
1679 FORWARD(1);
1680 else
1681 break;
1682 }
1683 CPP_GOTO_MARK (pfile);
1684
1685 if (c != '(')
1686 {
1687 not_macro_call:
1688 if (macbuf_whitespace)
1689 CPP_PUTC (pfile, ' ');
476f2869
ZW
1690
1691 /* K+R treated this as a hard error. */
1692 if (CPP_OPTION (pfile, warn_traditional))
1693 cpp_warning (pfile,
1694 "traditional C rejects function macro %s in non-function context",
1695 hp->name);
45b966db
ZW
1696 return 0;
1697 }
1698 }
1699
1700 is_macro_call:
1701 /* This is now known to be a macro call.
1702 Expand the macro, reading arguments as needed,
1703 and push the expansion on the input stack. */
1704 _cpp_macroexpand (pfile, hp);
1705 CPP_SET_WRITTEN (pfile, written);
1706 return 1;
1707}
1708
9e62c811
ZW
1709/* Complain about \v or \f in a preprocessing directive (constraint
1710 violation, C99 6.10 para 5). Caller has checked CPP_PEDANTIC. */
1711static void
1712pedantic_whitespace (pfile, p, len)
1713 cpp_reader *pfile;
1714 U_CHAR *p;
1715 unsigned int len;
1716{
1717 while (len)
1718 {
1719 if (*p == '\v')
1720 cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1721 else if (*p == '\f')
1722 cpp_pedwarn (pfile, "form feed in preprocessing directive");
1723 p++;
1724 len--;
1725 }
1726}
1727
1728
3a2b2c7a 1729enum cpp_ttype
45b966db
ZW
1730cpp_get_token (pfile)
1731 cpp_reader *pfile;
1732{
3a2b2c7a 1733 enum cpp_ttype token;
45b966db
ZW
1734 long written = CPP_WRITTEN (pfile);
1735
1736 get_next:
1737 token = _cpp_lex_token (pfile);
1738
1739 switch (token)
1740 {
1741 default:
ff2b53ef
ZW
1742 pfile->potential_control_macro = 0;
1743 pfile->only_seen_white = 0;
1744 return token;
1745
1746 case CPP_VSPACE:
1747 if (pfile->only_seen_white == 0)
1748 pfile->only_seen_white = 1;
1749 CPP_BUMP_LINE (pfile);
ff2b53ef
ZW
1750 return token;
1751
1752 case CPP_HSPACE:
1753 case CPP_COMMENT:
45b966db
ZW
1754 return token;
1755
15dad1d9 1756 case CPP_HASH:
ff2b53ef 1757 pfile->potential_control_macro = 0;
15dad1d9
ZW
1758 if (!pfile->only_seen_white)
1759 return CPP_HASH;
1760 /* XXX shouldn't have to do this - remove the hash or %: from
1761 the token buffer. */
1762 if (CPP_PWRITTEN (pfile)[-1] == '#')
1763 CPP_ADJUST_WRITTEN (pfile, -1);
1764 else
1765 CPP_ADJUST_WRITTEN (pfile, -2);
1766
45b966db 1767 if (_cpp_handle_directive (pfile))
15dad1d9 1768 return CPP_DIRECTIVE;
45b966db
ZW
1769 pfile->only_seen_white = 0;
1770 CPP_PUTC (pfile, '#');
15dad1d9 1771 return CPP_HASH;
45b966db
ZW
1772
1773 case CPP_MACRO:
ff2b53ef
ZW
1774 pfile->potential_control_macro = 0;
1775 pfile->only_seen_white = 0;
45b966db
ZW
1776 if (! pfile->no_macro_expand
1777 && maybe_macroexpand (pfile, written))
1778 goto get_next;
1779 return CPP_NAME;
1780
1781 case CPP_EOF:
f2d5f0cc
ZW
1782 if (CPP_BUFFER (pfile) == NULL)
1783 return CPP_EOF;
c56c2073 1784 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
f2d5f0cc 1785 {
45b966db
ZW
1786 cpp_pop_buffer (pfile);
1787 goto get_next;
1788 }
c56c2073
ZW
1789 cpp_pop_buffer (pfile);
1790 return CPP_EOF;
45b966db
ZW
1791 }
1792}
1793
1794/* Like cpp_get_token, but skip spaces and comments. */
1795
3a2b2c7a 1796enum cpp_ttype
45b966db
ZW
1797cpp_get_non_space_token (pfile)
1798 cpp_reader *pfile;
1799{
1800 int old_written = CPP_WRITTEN (pfile);
1801 for (;;)
1802 {
3a2b2c7a 1803 enum cpp_ttype token = cpp_get_token (pfile);
ff2b53ef 1804 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
45b966db
ZW
1805 return token;
1806 CPP_SET_WRITTEN (pfile, old_written);
1807 }
1808}
1809
ff2b53ef 1810/* Like cpp_get_token, except that it does not execute directives,
c56c2073 1811 does not consume vertical space, and discards horizontal space. */
3a2b2c7a 1812enum cpp_ttype
9e62c811 1813_cpp_get_directive_token (pfile)
45b966db
ZW
1814 cpp_reader *pfile;
1815{
ff2b53ef 1816 long old_written;
3a2b2c7a 1817 enum cpp_ttype token;
57c578a6 1818 int at_bol;
45b966db 1819
ff2b53ef 1820 get_next:
57c578a6 1821 at_bol = (CPP_BUFFER (pfile)->cur == CPP_BUFFER (pfile)->line_base);
ff2b53ef
ZW
1822 old_written = CPP_WRITTEN (pfile);
1823 token = _cpp_lex_token (pfile);
1824 switch (token)
45b966db 1825 {
ff2b53ef
ZW
1826 default:
1827 return token;
45b966db 1828
ff2b53ef
ZW
1829 case CPP_VSPACE:
1830 /* Put it back and return VSPACE. */
1831 FORWARD(-1);
1832 CPP_ADJUST_WRITTEN (pfile, -1);
1833 return CPP_VSPACE;
45b966db 1834
ff2b53ef 1835 case CPP_HSPACE:
57c578a6
ZW
1836 /* The purpose of this rather strange check is to prevent pedantic
1837 warnings for ^L in an #ifdefed out block. */
1838 if (CPP_PEDANTIC (pfile) && ! at_bol)
9e62c811
ZW
1839 pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1840 CPP_WRITTEN (pfile) - old_written);
1841 CPP_SET_WRITTEN (pfile, old_written);
1842 goto get_next;
ff2b53ef 1843 return CPP_HSPACE;
45b966db 1844
ff2b53ef
ZW
1845 case CPP_MACRO:
1846 if (! pfile->no_macro_expand
1847 && maybe_macroexpand (pfile, old_written))
1848 goto get_next;
1849 return CPP_NAME;
45b966db 1850
ff2b53ef
ZW
1851 case CPP_EOF:
1852 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
45b966db 1853 {
ff2b53ef
ZW
1854 cpp_pop_buffer (pfile);
1855 goto get_next;
45b966db 1856 }
ff2b53ef
ZW
1857 else
1858 /* This can happen for files that don't end with a newline,
1859 and for cpp_define and friends. Pretend they do, so
1860 callers don't have to deal. A warning will be issued by
1861 someone else, if necessary. */
1862 return CPP_VSPACE;
1863 }
1864}
1865
45b966db
ZW
1866/* Determine the current line and column. Used only by read_and_prescan. */
1867static U_CHAR *
1868find_position (start, limit, linep)
1869 U_CHAR *start;
1870 U_CHAR *limit;
1871 unsigned long *linep;
1872{
1873 unsigned long line = *linep;
1874 U_CHAR *lbase = start;
1875 while (start < limit)
1876 {
1877 U_CHAR ch = *start++;
1878 if (ch == '\n' || ch == '\r')
1879 {
1880 line++;
1881 lbase = start;
1882 }
1883 }
1884 *linep = line;
1885 return lbase;
1886}
1887
2a87fbe8
ZW
1888/* The following table is used by _cpp_read_and_prescan. If we have
1889 designated initializers, it can be constant data; otherwise, it is
1890 set up at runtime by _cpp_init_input_buffer. */
46d07497
ZW
1891
1892#ifndef UCHAR_MAX
1893#define UCHAR_MAX 255 /* assume 8-bit bytes */
1894#endif
1895
12cf91fe 1896#if (GCC_VERSION >= 2007)
2a87fbe8 1897#define init_chartab() /* nothing */
12cf91fe 1898#define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
46d07497
ZW
1899#define END };
1900#define s(p, v) [p] = v,
1901#else
12cf91fe 1902#define CHARTAB static U_CHAR chartab[UCHAR_MAX + 1] = { 0 }; \
2a87fbe8
ZW
1903 static void init_chartab PARAMS ((void)) { \
1904 unsigned char *x = chartab;
46d07497
ZW
1905#define END }
1906#define s(p, v) x[p] = v;
1907#endif
1908
1909/* Table of characters that can't be handled in the inner loop.
2a87fbe8
ZW
1910 Also contains the mapping between trigraph third characters and their
1911 replacements. */
46d07497
ZW
1912#define SPECCASE_CR 1
1913#define SPECCASE_BACKSLASH 2
1914#define SPECCASE_QUESTION 3
1915
2a87fbe8 1916CHARTAB
46d07497
ZW
1917 s('\r', SPECCASE_CR)
1918 s('\\', SPECCASE_BACKSLASH)
1919 s('?', SPECCASE_QUESTION)
46d07497 1920
46d07497
ZW
1921 s('=', '#') s(')', ']') s('!', '|')
1922 s('(', '[') s('\'', '^') s('>', '}')
1923 s('/', '\\') s('<', '{') s('-', '~')
1924END
1925
1926#undef CHARTAB
46d07497
ZW
1927#undef END
1928#undef s
1929
2a87fbe8
ZW
1930#define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1931#define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1932
45b966db
ZW
1933/* Read the entire contents of file DESC into buffer BUF. LEN is how
1934 much memory to allocate initially; more will be allocated if
1935 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1936 canonical form (\n). If enabled, convert and/or warn about
1937 trigraphs. Convert backslash-newline to a one-character escape
1938 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1939 token). If there is no newline at the end of the file, add one and
1940 warn. Returns -1 on failure, or the actual length of the data to
1941 be scanned.
1942
1943 This function does a lot of work, and can be a serious performance
1944 bottleneck. It has been tuned heavily; make sure you understand it
1945 before hacking. The common case - no trigraphs, Unix style line
1946 breaks, backslash-newline set off by whitespace, newline at EOF -
1947 has been optimized at the expense of the others. The performance
1948 penalty for DOS style line breaks (\r\n) is about 15%.
1949
1950 Warnings lose particularly heavily since we have to determine the
1951 line number, which involves scanning from the beginning of the file
1952 or from the last warning. The penalty for the absence of a newline
1953 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1954
1955 If your file has more than one kind of end-of-line marker, you
04e3ec78
NB
1956 will get messed-up line numbering.
1957
1958 So that the cases of the switch statement do not have to concern
1959 themselves with the complications of reading beyond the end of the
1960 buffer, the buffer is guaranteed to have at least 3 characters in
1961 it (or however many are left in the file, if less) on entry to the
1962 switch. This is enough to handle trigraphs and the "\\\n\r" and
1963 "\\\r\n" cases.
1964
1965 The end of the buffer is marked by a '\\', which, being a special
1966 character, guarantees we will exit the fast-scan loops and perform
1967 a refill. */
46d07497 1968
45b966db
ZW
1969long
1970_cpp_read_and_prescan (pfile, fp, desc, len)
1971 cpp_reader *pfile;
1972 cpp_buffer *fp;
1973 int desc;
1974 size_t len;
1975{
1976 U_CHAR *buf = (U_CHAR *) xmalloc (len);
1977 U_CHAR *ip, *op, *line_base;
1978 U_CHAR *ibase;
45b966db
ZW
1979 unsigned long line;
1980 unsigned int deferred_newlines;
45b966db 1981 size_t offset;
04e3ec78 1982 int count = 0;
45b966db
ZW
1983
1984 offset = 0;
04e3ec78 1985 deferred_newlines = 0;
45b966db
ZW
1986 op = buf;
1987 line_base = buf;
1988 line = 1;
04e3ec78
NB
1989 ibase = pfile->input_buffer + 3;
1990 ip = ibase;
1991 ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
45b966db
ZW
1992
1993 for (;;)
1994 {
04e3ec78
NB
1995 U_CHAR *near_buff_end;
1996
04e3ec78 1997 count = read (desc, ibase, pfile->input_buffer_len);
45b966db
ZW
1998 if (count < 0)
1999 goto error;
04e3ec78
NB
2000
2001 ibase[count] = '\\'; /* Marks end of buffer */
2002 if (count)
45b966db 2003 {
04e3ec78
NB
2004 near_buff_end = pfile->input_buffer + count;
2005 offset += count;
45b966db 2006 if (offset > len)
04e3ec78
NB
2007 {
2008 size_t delta_op;
2009 size_t delta_line_base;
1b955cba 2010 len = offset * 2;
04e3ec78
NB
2011 if (offset > len)
2012 /* len overflowed.
2013 This could happen if the file is larger than half the
2014 maximum address space of the machine. */
2015 goto too_big;
2016
2017 delta_op = op - buf;
2018 delta_line_base = line_base - buf;
2019 buf = (U_CHAR *) xrealloc (buf, len);
2020 op = buf + delta_op;
2021 line_base = buf + delta_line_base;
2022 }
2023 }
2024 else
2025 {
2026 if (ip == ibase)
2027 break;
2028 /* Allow normal processing of the (at most 2) remaining
2029 characters. The end-of-buffer marker is still present
2030 and prevents false matches within the switch. */
2031 near_buff_end = ibase - 1;
45b966db
ZW
2032 }
2033
2034 for (;;)
2035 {
04e3ec78 2036 unsigned int span;
45b966db 2037
04e3ec78 2038 /* Deal with \-newline, potentially in the middle of a token. */
45b966db
ZW
2039 if (deferred_newlines)
2040 {
2a87fbe8 2041 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
04e3ec78
NB
2042 {
2043 /* Previous was not white space. Skip to white
2044 space, if we can, before outputting the \r's */
2045 span = 0;
2046 while (ip[span] != ' '
2047 && ip[span] != '\t'
2048 && ip[span] != '\n'
2a87fbe8 2049 && NORMAL(ip[span]))
04e3ec78
NB
2050 span++;
2051 memcpy (op, ip, span);
2052 op += span;
2053 ip += span;
2a87fbe8 2054 if (! NORMAL(ip[0]))
04e3ec78
NB
2055 goto do_speccase;
2056 }
2057 while (deferred_newlines)
2058 deferred_newlines--, *op++ = '\r';
45b966db
ZW
2059 }
2060
2061 /* Copy as much as we can without special treatment. */
04e3ec78 2062 span = 0;
2a87fbe8 2063 while (NORMAL (ip[span])) span++;
45b966db
ZW
2064 memcpy (op, ip, span);
2065 op += span;
2066 ip += span;
2067
04e3ec78
NB
2068 do_speccase:
2069 if (ip > near_buff_end) /* Do we have enough chars? */
2070 break;
2a87fbe8 2071 switch (chartab[*ip++])
45b966db 2072 {
45b966db 2073 case SPECCASE_CR: /* \r */
04e3ec78 2074 if (ip[-2] != '\n')
45b966db 2075 {
04e3ec78
NB
2076 if (*ip == '\n')
2077 ip++;
2078 *op++ = '\n';
45b966db 2079 }
45b966db
ZW
2080 break;
2081
2082 case SPECCASE_BACKSLASH: /* \ */
04e3ec78 2083 if (*ip == '\n')
45b966db 2084 {
04e3ec78 2085 deferred_newlines++;
45b966db
ZW
2086 ip++;
2087 if (*ip == '\r') ip++;
45b966db
ZW
2088 }
2089 else if (*ip == '\r')
2090 {
04e3ec78 2091 deferred_newlines++;
45b966db
ZW
2092 ip++;
2093 if (*ip == '\n') ip++;
45b966db
ZW
2094 }
2095 else
2096 *op++ = '\\';
04e3ec78 2097 break;
45b966db
ZW
2098
2099 case SPECCASE_QUESTION: /* ? */
2100 {
2101 unsigned int d, t;
04e3ec78
NB
2102
2103 *op++ = '?'; /* Normal non-trigraph case */
2104 if (ip[0] != '?')
2105 break;
2106
45b966db 2107 d = ip[1];
2a87fbe8
ZW
2108 t = chartab[d];
2109 if (NONTRI (t))
04e3ec78 2110 break;
45b966db 2111
ae79697b 2112 if (CPP_OPTION (pfile, warn_trigraphs))
45b966db
ZW
2113 {
2114 unsigned long col;
2115 line_base = find_position (line_base, op, &line);
2116 col = op - line_base + 1;
ae79697b 2117 if (CPP_OPTION (pfile, trigraphs))
45b966db 2118 cpp_warning_with_line (pfile, line, col,
04e3ec78 2119 "trigraph ??%c converted to %c", d, t);
45b966db
ZW
2120 else
2121 cpp_warning_with_line (pfile, line, col,
04e3ec78 2122 "trigraph ??%c ignored", d);
45b966db 2123 }
04e3ec78
NB
2124
2125 ip += 2;
ae79697b 2126 if (CPP_OPTION (pfile, trigraphs))
45b966db 2127 {
04e3ec78 2128 op[-1] = t; /* Overwrite '?' */
45b966db 2129 if (t == '\\')
04e3ec78
NB
2130 {
2131 op--;
2132 *--ip = '\\';
2133 goto do_speccase; /* May need buffer refill */
2134 }
45b966db
ZW
2135 }
2136 else
2137 {
45b966db
ZW
2138 *op++ = '?';
2139 *op++ = d;
2140 }
2141 }
04e3ec78 2142 break;
45b966db
ZW
2143 }
2144 }
f6fab919
ZW
2145 /* Copy previous char plus unprocessed (at most 2) chars
2146 to beginning of buffer, refill it with another
2147 read(), and continue processing */
2148 memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
2149 ip -= count;
45b966db
ZW
2150 }
2151
2152 if (offset == 0)
2153 return 0;
2154
45b966db
ZW
2155 if (op[-1] != '\n')
2156 {
2157 unsigned long col;
2158 line_base = find_position (line_base, op, &line);
2159 col = op - line_base + 1;
f6fab919 2160 cpp_warning_with_line (pfile, line, col, "no newline at end of file");
45b966db
ZW
2161 if (offset + 1 > len)
2162 {
2163 len += 1;
2164 if (offset + 1 > len)
2165 goto too_big;
2166 buf = (U_CHAR *) xrealloc (buf, len);
2167 op = buf + offset;
2168 }
2169 *op++ = '\n';
2170 }
2171
2172 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
2173 return op - buf;
2174
2175 too_big:
f6fab919
ZW
2176 cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
2177 (unsigned long)offset);
45b966db
ZW
2178 free (buf);
2179 return -1;
2180
2181 error:
2182 cpp_error_from_errno (pfile, fp->ihash->name);
2183 free (buf);
2184 return -1;
2185}
2186
2a87fbe8
ZW
2187/* Allocate pfile->input_buffer, and initialize chartab[]
2188 if it hasn't happened already. */
46d07497 2189
45b966db
ZW
2190void
2191_cpp_init_input_buffer (pfile)
2192 cpp_reader *pfile;
2193{
2194 U_CHAR *tmp;
2195
2a87fbe8 2196 init_chartab ();
15dad1d9 2197 _cpp_init_toklist (&pfile->directbuf);
04e3ec78 2198
45b966db
ZW
2199 /* Determine the appropriate size for the input buffer. Normal C
2200 source files are smaller than eight K. */
04e3ec78
NB
2201 /* 8Kbytes of buffer proper, 1 to detect running off the end without
2202 address arithmetic all the time, and 3 for pushback during buffer
2203 refill, in case there's a potential trigraph or end-of-line
2204 digraph at the end of a block. */
45b966db 2205
04e3ec78 2206 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
45b966db
ZW
2207 pfile->input_buffer = tmp;
2208 pfile->input_buffer_len = 8192;
2209}
c5a04734 2210
6d2c2047
ZW
2211/* Utility routine:
2212 Compares, in the manner of strcmp(3), the token beginning at TOKEN
2213 and extending for LEN characters to the NUL-terminated string
2214 STRING. Typical usage:
2215
2216 if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2217 "inline"))
2218 { ... }
2219 */
2220
2221int
2222cpp_idcmp (token, len, string)
2223 const U_CHAR *token;
2224 size_t len;
2225 const char *string;
2226{
2227 size_t len2 = strlen (string);
2228 int r;
2229
2230 if ((r = memcmp (token, string, MIN (len, len2))))
2231 return r;
2232
2233 /* The longer of the two strings sorts after the shorter. */
2234 if (len == len2)
2235 return 0;
2236 else if (len < len2)
2237 return -1;
2238 else
2239 return 1;
2240}
2241
b8f41010 2242#ifdef NEW_LEXER
c5a04734 2243
d6d5f795
NB
2244/* Lexing algorithm.
2245
2246 The original lexer in cpplib was made up of two passes: a first pass
2247 that replaced trigraphs and deleted esacped newlines, and a second
2248 pass that tokenized the result of the first pass. Tokenisation was
2249 performed by peeking at the next character in the input stream. For
6777db6d 2250 example, if the input stream contained "!=", the handler for the !
d6d5f795 2251 character would peek at the next character, and if it were a '='
6777db6d
NB
2252 would skip over it, and return a "!=" token, otherwise it would
2253 return just the "!" token.
d6d5f795
NB
2254
2255 To implement a single-pass lexer, this peeking ahead is unworkable.
2256 An arbitrary number of escaped newlines, and trigraphs (in particular
6777db6d
NB
2257 ??/ which translates to the escape \), could separate the '!' and '='
2258 in the input stream, yet the next token is still a "!=".
d6d5f795
NB
2259
2260 Suppose instead that we lex by one logical line at a time, producing
6777db6d
NB
2261 a token list or stack for each logical line, and when seeing the '!'
2262 push a CPP_NOT token on the list. Then if the '!' is part of a
2263 longer token ("!=") we know we must see the remainder of the token by
2264 the time we reach the end of the logical line. Thus we can have the
2265 '=' handler look at the previous token (at the end of the list / top
2266 of the stack) and see if it is a "!" token, and if so, instead of
2267 pushing a "=" token revise the existing token to be a "!=" token.
d6d5f795
NB
2268
2269 This works in the presence of escaped newlines, because the '\' would
2270 have been pushed on the top of the stack as a CPP_BACKSLASH. The
2271 newline ('\n' or '\r') handler looks at the token at the top of the
2272 stack to see if it is a CPP_BACKSLASH, and if so discards both.
2273 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
2274 the '=' handler would never see any intervening escaped newlines.
2275
2276 To make trigraphs work in this context, as in precedence trigraphs
2277 are highest and converted before anything else, the '?' handler does
2278 lookahead to see if it is a trigraph, and if so skips the trigraph
2279 and pushes the token it represents onto the top of the stack. This
2280 also works in the particular case of a CPP_BACKSLASH trigraph.
2281
2282 To the preprocessor, whitespace is only significant to the point of
2283 knowing whether whitespace precedes a particular token. For example,
2284 the '=' handler needs to know whether there was whitespace between it
6777db6d 2285 and a "!" token on the top of the stack, to make the token conversion
d6d5f795
NB
2286 decision correctly. So each token has a PREV_WHITESPACE flag to
2287 indicate this - the standard permits consecutive whitespace to be
2288 regarded as a single space. The compiler front ends are not
2289 interested in whitespace at all; they just require a token stream.
2290 Another place where whitespace is significant to the preprocessor is
2291 a #define statment - if there is whitespace between the macro name
2292 and an initial "(" token the macro is "object-like", otherwise it is
2293 a function-like macro that takes arguments.
2294
2295 However, all is not rosy. Parsing of identifiers, numbers, comments
2296 and strings becomes trickier because of the possibility of raw
2297 trigraphs and escaped newlines in the input stream.
2298
2299 The trigraphs are three consecutive characters beginning with two
c2e25d51
NB
2300 question marks. A question mark is not valid as part of a number or
2301 identifier, so parsing of a number or identifier terminates normally
2302 upon reaching it, returning to the mainloop which handles the
2303 trigraph just like it would in any other position. Similarly for the
2304 backslash of a backslash-newline combination. So we just need the
2305 escaped-newline dropper in the mainloop to check if the token on the
2306 top of the stack after dropping the escaped newline is a number or
2307 identifier, and if so to continue the processing it as if nothing had
2308 happened.
d6d5f795
NB
2309
2310 For strings, we replace trigraphs whenever we reach a quote or
2311 newline, because there might be a backslash trigraph escaping them.
2312 We need to be careful that we start trigraph replacing from where we
2313 left off previously, because it is possible for a first scan to leave
2314 "fake" trigraphs that a second scan would pick up as real (e.g. the
c2e25d51 2315 sequence "????/\n=" would find a fake ??= trigraph after removing the
d6d5f795
NB
2316 escaped newline.)
2317
2318 For line comments, on reaching a newline we scan the previous
2319 character(s) to see if it escaped, and continue if it is. Block
2320 comments ignore everything and just focus on finding the comment
2321 termination mark. The only difficult thing, and it is surprisingly
2322 tricky, is checking if an asterisk precedes the final slash since
2323 they could be separated by escaped newlines. If the preprocessor is
2324 invoked with the output comments option, we don't bother removing
2325 escaped newlines and replacing trigraphs for output.
2326
2327 Finally, numbers can begin with a period, which is pushed initially
2328 as a CPP_DOT token in its own right. The digit handler checks if the
2329 previous token was a CPP_DOT not separated by whitespace, and if so
2330 pops it off the stack and pushes a period into the number's buffer
2331 before calling the number parser.
2332
2333*/
2334
b8f41010
NB
2335static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
2336 U":>", U"<%", U"%>"};
2337static unsigned char trigraph_map[256];
c5a04734
ZW
2338
2339static void
2340expand_comment_space (list)
2341 cpp_toklist *list;
2342{
2343 if (list->comments_cap == 0)
2344 {
2345 list->comments_cap = 10;
2346 list->comments = (cpp_token *)
2347 xmalloc (list->comments_cap * sizeof (cpp_token));
2348 }
2349 else
2350 {
2351 list->comments_cap *= 2;
2352 list->comments = (cpp_token *)
2353 xrealloc (list->comments, list->comments_cap);
2354 }
2355}
2356
c5a04734
ZW
2357void
2358init_trigraph_map ()
2359{
2360 trigraph_map['='] = '#';
2361 trigraph_map['('] = '[';
2362 trigraph_map[')'] = ']';
2363 trigraph_map['/'] = '\\';
2364 trigraph_map['\''] = '^';
2365 trigraph_map['<'] = '{';
2366 trigraph_map['>'] = '}';
2367 trigraph_map['!'] = '|';
2368 trigraph_map['-'] = '~';
2369}
2370
2371/* Call when a trigraph is encountered. It warns if necessary, and
2372 returns true if the trigraph should be honoured. END is the third
2373 character of a trigraph in the input stream. */
2374static int
2375trigraph_ok (pfile, end)
2376 cpp_reader *pfile;
2377 const unsigned char *end;
2378{
2379 int accept = CPP_OPTION (pfile, trigraphs);
2380
2381 if (CPP_OPTION (pfile, warn_trigraphs))
2382 {
2383 unsigned int col = end - 1 - pfile->buffer->line_base;
2384 if (accept)
2385 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2386 "trigraph ??%c converted to %c",
2387 (int) *end, (int) trigraph_map[*end]);
2388 else
2389 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2390 "trigraph ??%c ignored", (int) *end);
2391 }
2392 return accept;
2393}
2394
2395/* Scan a string for trigraphs, warning or replacing them inline as
2396 appropriate. When parsing a string, we must call this routine
2397 before processing a newline character (if trigraphs are enabled),
2398 since the newline might be escaped by a preceding backslash
2399 trigraph sequence. Returns a pointer to the end of the name after
2400 replacement. */
2401
2402static unsigned char*
2403trigraph_replace (pfile, src, limit)
2404 cpp_reader *pfile;
2405 unsigned char *src;
2406 unsigned char* limit;
2407{
2408 unsigned char *dest;
2409
2410 /* Starting with src[1], find two consecutive '?'. The case of no
2411 trigraphs is streamlined. */
2412
2413 for (; src + 1 < limit; src += 2)
2414 {
2415 if (src[0] != '?')
2416 continue;
2417
2418 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
2419 if (src[-1] == '?')
2420 src--;
2421 else if (src + 2 == limit || src[1] != '?')
2422 continue;
2423
2424 /* Check if it really is a trigraph. */
2425 if (trigraph_map[src[2]] == 0)
2426 continue;
2427
2428 dest = src;
2429 goto trigraph_found;
2430 }
2431 return limit;
2432
2433 /* Now we have a trigraph, we need to scan the remaining buffer, and
2434 copy-shifting its contents left if replacement is enabled. */
2435 for (; src + 2 < limit; dest++, src++)
2436 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2437 {
2438 trigraph_found:
2439 src += 2;
2440 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2441 *dest = trigraph_map[*src];
2442 }
2443
2444 /* Copy remaining (at most 2) characters. */
2445 while (src < limit)
2446 *dest++ = *src++;
2447 return dest;
2448}
2449
2450/* If CUR is a backslash or the end of a trigraphed backslash, return
2451 a pointer to its beginning, otherwise NULL. We don't read beyond
2452 the buffer start, because there is the start of the comment in the
2453 buffer. */
2454static const unsigned char *
2455backslash_start (pfile, cur)
2456 cpp_reader *pfile;
2457 const unsigned char *cur;
2458{
2459 if (cur[0] == '\\')
2460 return cur;
2461 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2462 && trigraph_ok (pfile, cur))
2463 return cur - 2;
2464 return 0;
2465}
2466
2467/* Skip a C-style block comment. This is probably the trickiest
2468 handler. We find the end of the comment by seeing if an asterisk
2469 is before every '/' we encounter. The nasty complication is that a
2470 previous asterisk may be separated by one or more escaped newlines.
2471 Returns non-zero if comment terminated by EOF, zero otherwise. */
2472static int
b8f41010 2473skip_block_comment2 (pfile)
c5a04734
ZW
2474 cpp_reader *pfile;
2475{
2476 cpp_buffer *buffer = pfile->buffer;
2477 const unsigned char *char_after_star = 0;
2478 register const unsigned char *cur = buffer->cur;
2479 int seen_eof = 0;
2480
2481 /* Inner loop would think the comment has ended if the first comment
2482 character is a '/'. Avoid this and keep the inner loop clean by
2483 skipping such a character. */
2484 if (cur < buffer->rlimit && cur[0] == '/')
2485 cur++;
2486
2487 for (; cur < buffer->rlimit; )
2488 {
2489 unsigned char c = *cur++;
2490
2491 /* People like decorating comments with '*', so check for
2492 '/' instead for efficiency. */
2493 if (c == '/')
2494 {
2495 if (cur[-2] == '*' || cur - 1 == char_after_star)
2496 goto out;
2497
2498 /* Warn about potential nested comments, but not when
2499 the final character inside the comment is a '/'.
2500 Don't bother to get it right across escaped newlines. */
2501 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2502 && cur[0] == '*' && cur[1] != '/')
2503 {
2504 buffer->cur = cur;
2505 cpp_warning (pfile, "'/*' within comment");
2506 }
2507 }
2508 else if (IS_NEWLINE(c))
2509 {
2510 const unsigned char* bslash = backslash_start (pfile, cur - 2);
2511
2512 handle_newline (cur, buffer->rlimit, c);
2513 /* Work correctly if there is an asterisk before an
2514 arbirtrarily long sequence of escaped newlines. */
2515 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2516 char_after_star = cur;
2517 else
2518 char_after_star = 0;
2519 }
2520 }
2521 seen_eof = 1;
2522
2523 out:
2524 buffer->cur = cur;
2525 return seen_eof;
2526}
2527
2528/* Skip a C++ or Chill line comment. Handles escaped newlines.
2529 Returns non-zero if a multiline comment. */
2530static int
b8f41010 2531skip_line_comment2 (pfile)
c5a04734
ZW
2532 cpp_reader *pfile;
2533{
2534 cpp_buffer *buffer = pfile->buffer;
2535 register const unsigned char *cur = buffer->cur;
2536 int multiline = 0;
2537
2538 for (; cur < buffer->rlimit; )
2539 {
2540 unsigned char c = *cur++;
2541
2542 if (IS_NEWLINE (c))
2543 {
2544 /* Check for a (trigaph?) backslash escaping the newline. */
2545 if (!backslash_start (pfile, cur - 2))
2546 goto out;
2547 multiline = 1;
2548 handle_newline (cur, buffer->rlimit, c);
2549 }
2550 }
2551 cur++;
2552
2553 out:
2554 buffer->cur = cur - 1; /* Leave newline for caller. */
2555 return multiline;
2556}
2557
6ab3e7dd
NB
2558/* Skips whitespace, stopping at next non-whitespace character.
2559 Adjusts pfile->col_adjust to account for tabs. This enables tokens
2560 to be assigned the correct column. */
c5a04734
ZW
2561static void
2562skip_whitespace (pfile, in_directive)
2563 cpp_reader *pfile;
2564 int in_directive;
2565{
2566 cpp_buffer *buffer = pfile->buffer;
2567 register const unsigned char *cur = buffer->cur;
2568 unsigned short null_count = 0;
2569
2570 for (; cur < buffer->rlimit; )
2571 {
2572 unsigned char c = *cur++;
2573
6ab3e7dd
NB
2574 if (c == '\t')
2575 {
2576 unsigned int col = CPP_BUF_COLUMN (buffer, cur - 1);
2577 pfile->col_adjust += (CPP_OPTION (pfile, tabstop) - 1
2578 - col % CPP_OPTION(pfile, tabstop));
2579 }
c5a04734
ZW
2580 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
2581 continue;
2582 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
2583 goto out;
2584 if (c == '\0')
2585 null_count++;
2586 /* Mut be '\f' or '\v' */
2587 else if (in_directive && CPP_PEDANTIC (pfile))
2588 cpp_pedwarn (pfile, "%s in preprocessing directive",
2589 c == '\f' ? "formfeed" : "vertical tab");
2590 }
2591 cur++;
2592
2593 out:
2594 buffer->cur = cur - 1;
2595 if (null_count)
2596 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2597 : "embedded null character ignored");
2598}
2599
2600/* Parse (append) an identifier. */
2601static void
2602parse_name (pfile, list, name)
2603 cpp_reader *pfile;
2604 cpp_toklist *list;
2605 cpp_name *name;
2606{
2607 const unsigned char *name_limit;
2608 unsigned char *namebuf;
2609 cpp_buffer *buffer = pfile->buffer;
2610 register const unsigned char *cur = buffer->cur;
2611
2612 expanded:
2613 name_limit = list->namebuf + list->name_cap;
2614 namebuf = list->namebuf + list->name_used;
2615
2616 for (; cur < buffer->rlimit && namebuf < name_limit; )
2617 {
2618 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2619
2620 if (! is_idchar(c))
2621 goto out;
2622 namebuf++;
2623 cur++;
2624 if (c == '$' && CPP_PEDANTIC (pfile))
2625 {
2626 buffer->cur = cur;
2627 cpp_pedwarn (pfile, "'$' character in identifier");
2628 }
2629 }
2630
2631 /* Run out of name space? */
2632 if (cur < buffer->rlimit)
2633 {
2634 list->name_used = namebuf - list->namebuf;
2635 auto_expand_name_space (list);
2636 goto expanded;
2637 }
2638
2639 out:
2640 buffer->cur = cur;
f617b8e2 2641 name->len = namebuf - name->text;
c5a04734
ZW
2642 list->name_used = namebuf - list->namebuf;
2643}
2644
2645/* Parse (append) a number. */
2646
2647#define VALID_SIGN(c, prevc) \
2648 (((c) == '+' || (c) == '-') && \
2649 ((prevc) == 'e' || (prevc) == 'E' \
2650 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2651
2652static void
2653parse_number (pfile, list, name)
2654 cpp_reader *pfile;
2655 cpp_toklist *list;
2656 cpp_name *name;
2657{
2658 const unsigned char *name_limit;
2659 unsigned char *namebuf;
2660 cpp_buffer *buffer = pfile->buffer;
2661 register const unsigned char *cur = buffer->cur;
2662
2663 expanded:
2664 name_limit = list->namebuf + list->name_cap;
2665 namebuf = list->namebuf + list->name_used;
2666
2667 for (; cur < buffer->rlimit && namebuf < name_limit; )
2668 {
2669 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2670
2671 /* Perhaps we should accept '$' here if we accept it for
2672 identifiers. We know namebuf[-1] is safe, because for c to
2673 be a sign we must have pushed at least one character. */
2674 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2675 goto out;
2676
2677 namebuf++;
2678 cur++;
2679 }
2680
2681 /* Run out of name space? */
2682 if (cur < buffer->rlimit)
2683 {
2684 list->name_used = namebuf - list->namebuf;
2685 auto_expand_name_space (list);
2686 goto expanded;
2687 }
2688
2689 out:
2690 buffer->cur = cur;
f617b8e2 2691 name->len = namebuf - name->text;
c5a04734
ZW
2692 list->name_used = namebuf - list->namebuf;
2693}
2694
2695/* Places a string terminated by an unescaped TERMINATOR into a
2696 cpp_name, which should be expandable and thus at the top of the
2697 list's stack. Handles embedded trigraphs, if necessary, and
2698 escaped newlines.
2699
2700 Can be used for character constants (terminator = '\''), string
41e8b1d7
NB
2701 constants ('"') and angled headers ('>'). Multi-line strings are
2702 allowed, except for within directives. */
c5a04734
ZW
2703
2704static void
b8f41010 2705parse_string2 (pfile, list, name, terminator)
c5a04734
ZW
2706 cpp_reader *pfile;
2707 cpp_toklist *list;
2708 cpp_name *name;
2709 unsigned int terminator;
2710{
2711 cpp_buffer *buffer = pfile->buffer;
2712 register const unsigned char *cur = buffer->cur;
2713 const unsigned char *name_limit;
2714 unsigned char *namebuf;
2715 unsigned int null_count = 0;
2716 int trigraphed_len = 0;
2717
2718 expanded:
2719 name_limit = list->namebuf + list->name_cap;
2720 namebuf = list->namebuf + list->name_used;
2721
2722 for (; cur < buffer->rlimit && namebuf < name_limit; )
2723 {
2724 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
2725
2726 if (c == '\0')
2727 null_count++;
2728 else if (c == terminator || IS_NEWLINE (c))
2729 {
c5a04734
ZW
2730 /* Needed for trigraph_replace and multiline string warning. */
2731 buffer->cur = cur;
2732
2733 /* Scan for trigraphs before checking if backslash-escaped. */
2734 if (CPP_OPTION (pfile, trigraphs)
2735 || CPP_OPTION (pfile, warn_trigraphs))
2736 {
f617b8e2 2737 namebuf = trigraph_replace (pfile, name->text + trigraphed_len,
c5a04734 2738 namebuf);
f617b8e2 2739 trigraphed_len = namebuf - 2 - (name->text + trigraphed_len);
c5a04734
ZW
2740 if (trigraphed_len < 0)
2741 trigraphed_len = 0;
2742 }
2743
2744 namebuf--; /* Drop the newline / terminator from the name. */
2745 if (IS_NEWLINE (c))
2746 {
2747 /* Drop a backslash newline, and continue. */
2748 if (namebuf[-1] == '\\')
2749 {
2750 handle_newline (cur, buffer->rlimit, c);
2751 namebuf--;
2752 continue;
2753 }
2754
2755 cur--;
2756
2757 /* In Fortran and assembly language, silently terminate
2758 strings of either variety at end of line. This is a
2759 kludge around not knowing where comments are in these
2760 languages. */
2761 if (CPP_OPTION (pfile, lang_fortran)
2762 || CPP_OPTION (pfile, lang_asm))
2763 goto out;
2764
2765 /* Character constants, headers and asserts may not
2766 extend over multiple lines. In Standard C, neither
2767 may strings. We accept multiline strings as an
2768 extension, but not in directives. */
2769 if (terminator != '"' || IS_DIRECTIVE (list))
2770 goto unterminated;
2771
2772 cur++; /* Move forwards again. */
2773
2774 if (pfile->multiline_string_line == 0)
2775 {
2776 pfile->multiline_string_line = list->line;
2777 if (CPP_PEDANTIC (pfile))
2778 cpp_pedwarn (pfile, "multi-line string constant");
2779 }
2780
2781 *namebuf++ = '\n';
2782 handle_newline (cur, buffer->rlimit, c);
2783 }
2784 else
2785 {
2786 unsigned char *temp;
2787
2788 /* An odd number of consecutive backslashes represents
2789 an escaped terminator. */
2790 temp = namebuf - 1;
f617b8e2 2791 while (temp >= name->text && *temp == '\\')
c5a04734
ZW
2792 temp--;
2793
2794 if ((namebuf - temp) & 1)
2795 goto out;
2796 namebuf++;
2797 }
2798 }
2799 }
2800
2801 /* Run out of name space? */
2802 if (cur < buffer->rlimit)
2803 {
2804 list->name_used = namebuf - list->namebuf;
2805 auto_expand_name_space (list);
2806 goto expanded;
2807 }
2808
2809 /* We may not have trigraph-replaced the input for this code path,
2810 but as the input is in error by being unterminated we don't
2811 bother. Prevent warnings about no newlines at EOF. */
2812 if (IS_NEWLINE(cur[-1]))
2813 cur--;
2814
2815 unterminated:
2816 cpp_error (pfile, "missing terminating %c character", (int) terminator);
2817
2818 if (terminator == '\"' && pfile->multiline_string_line != list->line
2819 && pfile->multiline_string_line != 0)
2820 {
2821 cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2822 "possible start of unterminated string literal");
2823 pfile->multiline_string_line = 0;
2824 }
2825
2826 out:
2827 buffer->cur = cur;
f617b8e2 2828 name->len = namebuf - name->text;
c5a04734
ZW
2829 list->name_used = namebuf - list->namebuf;
2830
2831 if (null_count > 0)
2832 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2833 : "null character preserved"));
2834}
2835
5d7ee2fa
NB
2836/* The character TYPE helps us distinguish comment types: '*' = C
2837 style, '-' = Chill-style and '/' = C++ style. For code simplicity,
2838 the stored comment includes the comment start and any terminator. */
2839
2840#define COMMENT_START_LEN 2
c5a04734 2841static void
b8f41010 2842save_comment (list, from, len, tok_no, type)
c5a04734
ZW
2843 cpp_toklist *list;
2844 const unsigned char *from;
2845 unsigned int len;
2846 unsigned int tok_no;
2847 unsigned int type;
2848{
2849 cpp_token *comment;
5d7ee2fa
NB
2850 unsigned char *buffer;
2851
2852 len += COMMENT_START_LEN;
c5a04734
ZW
2853
2854 if (list->comments_used == list->comments_cap)
2855 expand_comment_space (list);
2856
2857 if (list->name_used + len > list->name_cap)
2858 expand_name_space (list, len);
2859
f617b8e2
NB
2860 buffer = list->namebuf + list->name_used;
2861
c5a04734 2862 comment = &list->comments[list->comments_used++];
5d7ee2fa 2863 comment->type = CPP_COMMENT;
c5a04734
ZW
2864 comment->aux = tok_no;
2865 comment->val.name.len = len;
f617b8e2 2866 comment->val.name.text = buffer;
c5a04734 2867
5d7ee2fa
NB
2868 if (type == '*')
2869 {
2870 *buffer++ = '/';
2871 *buffer++ = '*';
2872 }
2873 else
2874 {
2875 *buffer++ = type;
2876 *buffer++ = type;
2877 }
2878
2879 memcpy (buffer, from, len - COMMENT_START_LEN);
c5a04734
ZW
2880 list->name_used += len;
2881}
2882
2883/*
2884 * The tokenizer's main loop. Returns a token list, representing a
2885 * logical line in the input file, terminated with a CPP_VSPACE
2886 * token. On EOF, a token list containing the single CPP_EOF token
2887 * is returned.
2888 *
2889 * Implementation relies almost entirely on lookback, rather than
2890 * looking forwards. This means that tokenization requires just
2891 * a single pass of the file, even in the presence of trigraphs and
2892 * escaped newlines, providing significant performance benefits.
2893 * Trigraph overhead is negligible if they are disabled, and low
2894 * even when enabled.
2895 */
2896
c5a04734
ZW
2897void
2898_cpp_lex_line (pfile, list)
2899 cpp_reader *pfile;
2900 cpp_toklist *list;
2901{
2902 cpp_token *cur_token, *token_limit;
2903 cpp_buffer *buffer = pfile->buffer;
2904 register const unsigned char *cur = buffer->cur;
2905 unsigned char flags = 0;
2906
6ab3e7dd 2907 pfile->col_adjust = 0;
c5a04734
ZW
2908 expanded:
2909 token_limit = list->tokens + list->tokens_cap;
2910 cur_token = list->tokens + list->tokens_used;
2911
2912 for (; cur < buffer->rlimit && cur_token < token_limit;)
2913 {
2914 unsigned char c = *cur++;
2915
6ab3e7dd
NB
2916 /* Optimize whitespace skipping, as most tokens are probably
2917 separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
2918
c5a04734
ZW
2919 if (is_hspace ((unsigned int) c))
2920 {
6ab3e7dd
NB
2921 /* Step back to get the null warning and tab correction. */
2922 buffer->cur = cur - 1;
2923 skip_whitespace (pfile, IS_DIRECTIVE (list));
2924 cur = buffer->cur;
2925
c5a04734
ZW
2926 flags = PREV_WHITESPACE;
2927 if (cur == buffer->rlimit)
2928 break;
2929 c = *cur++;
2930 }
2931
2932 /* Initialize current token. Its type is set in the switch. */
6ab3e7dd 2933 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
c5a04734
ZW
2934 cur_token->flags = flags;
2935 flags = 0;
2936
2937 switch (c)
2938 {
2939 case '0': case '1': case '2': case '3': case '4':
2940 case '5': case '6': case '7': case '8': case '9':
f617b8e2 2941 cur--; /* Backup character. */
c5a04734
ZW
2942 if (PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ())
2943 {
f617b8e2 2944 /* Prepend an immediately previous CPP_DOT token. */
c5a04734
ZW
2945 cur_token--;
2946 if (list->name_cap == list->name_used)
2947 auto_expand_name_space (list);
2948
2949 cur_token->val.name.len = 1;
f617b8e2 2950 cur_token->val.name.text = list->namebuf + list->name_used;
c5a04734
ZW
2951 list->namebuf[list->name_used++] = '.';
2952 }
2953 else
2954 INIT_NAME (list, cur_token->val.name);
c5a04734
ZW
2955
2956 continue_number:
2957 buffer->cur = cur;
2958 parse_number (pfile, list, &cur_token->val.name);
2959 cur = buffer->cur;
2960
2961 PUSH_TOKEN (CPP_NUMBER); /* Number not yet interpreted. */
2962 break;
2963
2964 letter:
2965 case '_':
2966 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2967 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2968 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2969 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2970 case 'y': case 'z':
2971 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2972 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2973 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2974 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2975 case 'Y': case 'Z':
c5a04734 2976 cur--; /* Backup character. */
f617b8e2 2977 INIT_NAME (list, cur_token->val.name);
c5a04734
ZW
2978 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
2979
2980 continue_name:
2981 buffer->cur = cur;
2982 parse_name (pfile, list, &cur_token->val.name);
2983 cur = buffer->cur;
2984
2985 /* Find handler for newly created / extended directive. */
2986 if (IS_DIRECTIVE (list) && cur_token == &list->tokens[1])
2987 _cpp_check_directive (list, cur_token);
2988 cur_token++;
2989 break;
2990
2991 case '\'':
2992 /* Fall through. */
2993 case '\"':
2994 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2995 /* Do we have a wide string? */
2996 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2997 && cur_token[-1].val.name.len == 1
f617b8e2 2998 && cur_token[-1].val.name.text[0] == 'L'
c5a04734
ZW
2999 && !CPP_TRADITIONAL (pfile))
3000 {
3001 /* No need for 'L' any more. */
3002 list->name_used--;
3003 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
3004 }
3005
3006 do_parse_string:
6ab3e7dd 3007 /* Here c is one of ' " or >. */
c5a04734
ZW
3008 INIT_NAME (list, cur_token->val.name);
3009 buffer->cur = cur;
b8f41010 3010 parse_string2 (pfile, list, &cur_token->val.name, c);
c5a04734
ZW
3011 cur = buffer->cur;
3012 cur_token++;
3013 break;
3014
3015 case '/':
3016 cur_token->type = CPP_DIV;
3017 if (IMMED_TOKEN ())
3018 {
3019 if (PREV_TOKEN_TYPE == CPP_DIV)
3020 {
3021 /* We silently allow C++ comments in system headers,
3022 irrespective of conformance mode, because lots of
3023 broken systems do that and trying to clean it up
3024 in fixincludes is a nightmare. */
3025 if (buffer->system_header_p)
3026 goto do_line_comment;
3027 else if (CPP_OPTION (pfile, cplusplus_comments))
3028 {
3029 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
3030 && ! buffer->warned_cplusplus_comments)
3031 {
3032 buffer->cur = cur;
3033 cpp_pedwarn (pfile,
3034 "C++ style comments are not allowed in ISO C89");
3035 cpp_pedwarn (pfile,
3036 "(this will be reported only once per input file)");
3037 buffer->warned_cplusplus_comments = 1;
3038 }
3039 do_line_comment:
3040 buffer->cur = cur;
3041 if (cur[-2] != c)
3042 cpp_warning (pfile,
3043 "comment start split across lines");
b8f41010 3044 if (skip_line_comment2 (pfile))
c5a04734
ZW
3045 cpp_error_with_line (pfile, list->line,
3046 cur_token[-1].col,
3047 "multi-line comment");
3048 if (!CPP_OPTION (pfile, discard_comments))
b8f41010 3049 save_comment (list, cur, buffer->cur - cur,
5d7ee2fa 3050 cur_token - 1 - list->tokens, c);
c5a04734
ZW
3051 cur = buffer->cur;
3052
3053 /* Back-up to first '-' or '/'. */
3054 cur_token -= 2;
3055 if (!CPP_OPTION (pfile, traditional))
3056 flags = PREV_WHITESPACE;
3057 }
3058 }
3059 }
3060 cur_token++;
3061 break;
3062
3063 case '*':
3064 cur_token->type = CPP_MULT;
3065 if (IMMED_TOKEN ())
3066 {
3067 if (PREV_TOKEN_TYPE == CPP_DIV)
3068 {
3069 buffer->cur = cur;
3070 if (cur[-2] != '/')
3071 cpp_warning (pfile,
3072 "comment start '/*' split across lines");
b8f41010 3073 if (skip_block_comment2 (pfile))
c5a04734
ZW
3074 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
3075 "unterminated comment");
3076 else if (buffer->cur[-2] != '*')
3077 cpp_warning (pfile,
3078 "comment end '*/' split across lines");
3079 if (!CPP_OPTION (pfile, discard_comments))
b8f41010 3080 save_comment (list, cur, buffer->cur - cur,
5d7ee2fa 3081 cur_token - 1 - list->tokens, c);
c5a04734
ZW
3082 cur = buffer->cur;
3083
f617b8e2 3084 cur_token--;
c5a04734
ZW
3085 if (!CPP_OPTION (pfile, traditional))
3086 flags = PREV_WHITESPACE;
f617b8e2 3087 break;
c5a04734
ZW
3088 }
3089 else if (CPP_OPTION (pfile, cplusplus))
3090 {
3091 /* In C++, there are .* and ->* operators. */
3092 if (PREV_TOKEN_TYPE == CPP_DEREF)
3093 BACKUP_TOKEN (CPP_DEREF_STAR);
3094 else if (PREV_TOKEN_TYPE == CPP_DOT)
3095 BACKUP_TOKEN (CPP_DOT_STAR);
3096 }
3097 }
3098 cur_token++;
3099 break;
3100
3101 case '\n':
3102 case '\r':
3103 handle_newline (cur, buffer->rlimit, c);
fb4527c3 3104 if (PREV_TOKEN_TYPE == CPP_BACKSLASH && IMMED_TOKEN ())
c5a04734 3105 {
fb4527c3
NB
3106 /* Remove the escaped newline. Then continue to process
3107 any interrupted name or number. */
3108 cur_token--;
3109 if (IMMED_TOKEN ())
c5a04734 3110 {
fb4527c3
NB
3111 cur_token--;
3112 if (cur_token->type == CPP_NAME)
3113 goto continue_name;
3114 else if (cur_token->type == CPP_NUMBER)
3115 goto continue_number;
3116 cur_token++;
c5a04734 3117 }
fb4527c3
NB
3118 /* Remember whitespace setting. */
3119 flags = cur_token->flags;
3120 break;
c5a04734 3121 }
fb4527c3 3122 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
c5a04734 3123 {
fb4527c3
NB
3124 buffer->cur = cur;
3125 cpp_warning (pfile, "backslash and newline separated by space");
c5a04734 3126 }
fb4527c3
NB
3127 PUSH_TOKEN (CPP_VSPACE);
3128 goto out;
c5a04734
ZW
3129
3130 case '-':
3131 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3132 {
3133 if (CPP_OPTION (pfile, chill))
3134 goto do_line_comment;
3135 REVISE_TOKEN (CPP_MINUS_MINUS);
3136 }
3137 else
3138 PUSH_TOKEN (CPP_MINUS);
3139 break;
3140
3141 /* The digraph flag checking ensures that ## and %:%:
3142 are interpreted as CPP_PASTE, but #%: and %:# are not. */
3143 make_hash:
3144 case '#':
3145 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3146 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3147 REVISE_TOKEN (CPP_PASTE);
3148 else
3149 PUSH_TOKEN (CPP_HASH);
3150 break;
3151
3152 case ':':
3153 cur_token->type = CPP_COLON;
3154 if (IMMED_TOKEN ())
3155 {
3156 if (PREV_TOKEN_TYPE == CPP_COLON
3157 && CPP_OPTION (pfile, cplusplus))
3158 BACKUP_TOKEN (CPP_SCOPE);
3159 /* Digraph: "<:" is a '[' */
3160 else if (PREV_TOKEN_TYPE == CPP_LESS)
3161 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3162 /* Digraph: "%:" is a '#' */
3163 else if (PREV_TOKEN_TYPE == CPP_MOD)
3164 {
3165 (--cur_token)->flags |= DIGRAPH;
3166 goto make_hash;
3167 }
3168 }
3169 cur_token++;
3170 break;
3171
3172 case '&':
3173 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3174 REVISE_TOKEN (CPP_AND_AND);
3175 else
3176 PUSH_TOKEN (CPP_AND);
3177 break;
3178
3179 make_or:
3180 case '|':
3181 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3182 REVISE_TOKEN (CPP_OR_OR);
3183 else
3184 PUSH_TOKEN (CPP_OR);
3185 break;
3186
3187 case '+':
3188 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3189 REVISE_TOKEN (CPP_PLUS_PLUS);
3190 else
3191 PUSH_TOKEN (CPP_PLUS);
3192 break;
3193
3194 case '=':
3195 /* This relies on equidistance of "?=" and "?" tokens. */
3196 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3197 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3198 else
3199 PUSH_TOKEN (CPP_EQ);
3200 break;
3201
3202 case '>':
3203 cur_token->type = CPP_GREATER;
3204 if (IMMED_TOKEN ())
3205 {
3206 if (PREV_TOKEN_TYPE == CPP_GREATER)
3207 BACKUP_TOKEN (CPP_RSHIFT);
3208 else if (PREV_TOKEN_TYPE == CPP_MINUS)
3209 BACKUP_TOKEN (CPP_DEREF);
3210 /* Digraph: ":>" is a ']' */
3211 else if (PREV_TOKEN_TYPE == CPP_COLON)
3212 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3213 /* Digraph: "%>" is a '}' */
3214 else if (PREV_TOKEN_TYPE == CPP_MOD)
3215 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3216 }
3217 cur_token++;
3218 break;
3219
3220 case '<':
3221 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3222 {
3223 REVISE_TOKEN (CPP_LSHIFT);
3224 break;
3225 }
3226 /* Is this the beginning of a header name? */
15dad1d9 3227 if (list->flags & SYNTAX_INCLUDE)
c5a04734
ZW
3228 {
3229 c = '>'; /* Terminator. */
3230 cur_token->type = CPP_HEADER_NAME;
3231 goto do_parse_string;
3232 }
3233 PUSH_TOKEN (CPP_LESS);
3234 break;
3235
3236 case '%':
3237 /* Digraph: "<%" is a '{' */
3238 cur_token->type = CPP_MOD;
3239 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3240 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3241 cur_token++;
3242 break;
3243
c5a04734
ZW
3244 case '?':
3245 if (cur + 1 < buffer->rlimit && *cur == '?'
3246 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3247 {
3248 /* Handle trigraph. */
3249 cur++;
3250 switch (*cur++)
3251 {
3252 case '(': goto make_open_square;
3253 case ')': goto make_close_square;
3254 case '<': goto make_open_brace;
3255 case '>': goto make_close_brace;
3256 case '=': goto make_hash;
3257 case '!': goto make_or;
3258 case '-': goto make_complement;
3259 case '/': goto make_backslash;
3260 case '\'': goto make_xor;
3261 }
3262 }
3263 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3264 {
3265 /* GNU C++ defines <? and >? operators. */
3266 if (PREV_TOKEN_TYPE == CPP_LESS)
3267 {
3268 REVISE_TOKEN (CPP_MIN);
3269 break;
3270 }
3271 else if (PREV_TOKEN_TYPE == CPP_GREATER)
3272 {
3273 REVISE_TOKEN (CPP_MAX);
3274 break;
3275 }
3276 }
3277 PUSH_TOKEN (CPP_QUERY);
3278 break;
3279
3280 case '.':
3281 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3282 && IMMED_TOKEN ()
3283 && !(cur_token[-1].flags & PREV_WHITESPACE))
3284 {
3285 cur_token -= 2;
3286 PUSH_TOKEN (CPP_ELLIPSIS);
3287 }
3288 else
3289 PUSH_TOKEN (CPP_DOT);
3290 break;
3291
cfd5b8b8
NB
3292 make_complement:
3293 case '~': PUSH_TOKEN (CPP_COMPL); break;
c5a04734
ZW
3294 make_xor:
3295 case '^': PUSH_TOKEN (CPP_XOR); break;
3296 make_open_brace:
3297 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3298 make_close_brace:
3299 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3300 make_open_square:
3301 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3302 make_close_square:
3303 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3304 make_backslash:
3305 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3306 case '!': PUSH_TOKEN (CPP_NOT); break;
3307 case ',': PUSH_TOKEN (CPP_COMMA); break;
3308 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
41e8b1d7 3309 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
cfd5b8b8 3310 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
c5a04734
ZW
3311
3312 case '$':
3313 if (CPP_OPTION (pfile, dollars_in_ident))
3314 goto letter;
3315 /* Fall through */
3316 default:
3317 cur_token->aux = c;
f617b8e2 3318 cur_token->val.name.len = 0; /* FIXME: needed for transition only */
c5a04734
ZW
3319 PUSH_TOKEN (CPP_OTHER);
3320 break;
3321 }
3322 }
3323
3324 /* Run out of token space? */
3325 if (cur_token == token_limit)
3326 {
3327 list->tokens_used = cur_token - list->tokens;
3328 expand_token_space (list);
3329 goto expanded;
3330 }
3331
3332 cur_token->type = CPP_EOF;
3333 cur_token->flags = flags;
3334
3335 if (cur_token != &list->tokens[0])
3336 {
3337 /* Next call back will get just a CPP_EOF. */
3338 buffer->cur = cur;
3339 cpp_warning (pfile, "no newline at end of file");
3340 PUSH_TOKEN (CPP_VSPACE);
3341 }
3342
3343 out:
3344 buffer->cur = cur;
3345
3346 list->tokens_used = cur_token - list->tokens;
3347
3348 /* FIXME: take this check out and put it in the caller.
3349 list->directive == 0 indicates an unknown directive (but null
3350 directive is OK). This is the first time we can be sure the
3351 directive is invalid, and thus warn about it, because it might
3352 have been split by escaped newlines. Also, don't complain about
3353 invalid directives in assembly source, we don't know where the
3354 comments are, and # may introduce assembler pseudo-ops. */
3355
15dad1d9 3356 if (IS_DIRECTIVE (list) && list->dirno == -1
c5a04734
ZW
3357 && list->tokens[1].type != CPP_VSPACE
3358 && !CPP_OPTION (pfile, lang_asm))
3359 cpp_error_with_line (pfile, list->line, list->tokens[1].col,
3360 "invalid preprocessing directive");
3361}
3362
3fef5b2b
NB
3363/* Write the spelling of a token TOKEN to BUFFER. The buffer must
3364 already contain the enough space to hold the token's spelling. If
3365 WHITESPACE is true, and the token was preceded by whitespace,
3366 output a single space before the token proper. Returns a pointer
3367 to the character after the last character written. */
3368
3369static unsigned char *
f617b8e2 3370spell_token (pfile, token, buffer, whitespace)
3fef5b2b
NB
3371 cpp_reader *pfile; /* Would be nice to be rid of this... */
3372 cpp_token *token;
3fef5b2b
NB
3373 unsigned char *buffer;
3374 int whitespace;
3375{
3376 /* Whitespace will not be wanted by handlers of the # and ##
3377 operators calling this function, but will be wanted by the
3378 function that writes out the preprocessed file. */
3379 if (whitespace && token->flags & PREV_WHITESPACE)
3380 *buffer++ = ' ';
3381
3382 switch (token_spellings[token->type].type)
3383 {
5d7ee2fa 3384 case SPELL_OPERATOR:
3fef5b2b
NB
3385 {
3386 const unsigned char *spelling;
3387 unsigned char c;
3388
3389 if (token->flags & DIGRAPH)
3390 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
3391 else
f617b8e2 3392 spelling = token_spellings[token->type].spelling;
3fef5b2b
NB
3393
3394 while ((c = *spelling++) != '\0')
3395 *buffer++ = c;
3396 }
3397 break;
3398
5d7ee2fa 3399 case SPELL_IDENT:
f617b8e2 3400 memcpy (buffer, token->val.name.text, token->val.name.len);
5d7ee2fa
NB
3401 buffer += token->val.name.len;
3402 break;
3403
3404 case SPELL_STRING:
3fef5b2b 3405 {
5d7ee2fa 3406 unsigned char c;
3fef5b2b 3407
5d7ee2fa
NB
3408 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
3409 *buffer++ = 'L';
3410 c = '\'';
3411 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
3412 c = '"';
3413 *buffer++ = c;
f617b8e2 3414 memcpy (buffer, token->val.name.text, token->val.name.len);
5d7ee2fa
NB
3415 buffer += token->val.name.len;
3416 *buffer++ = c;
3fef5b2b
NB
3417 }
3418 break;
3419
3420 case SPELL_CHAR:
3421 *buffer++ = token->aux;
3422 break;
3423
3424 case SPELL_NONE:
3425 cpp_ice (pfile, "Unspellable token");
3426 break;
3427 }
3428
3429 return buffer;
3430}
3431
3432/* Temporary function for illustrative purposes. */
c5a04734
ZW
3433void
3434_cpp_lex_file (pfile)
3435 cpp_reader* pfile;
3436{
c5a04734
ZW
3437 cpp_toklist* list;
3438
3439 init_trigraph_map ();
3440 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
15dad1d9 3441 _cpp_init_toklist (list);
c5a04734 3442
15dad1d9 3443 for (;;)
c5a04734 3444 {
c5a04734
ZW
3445 _cpp_lex_line (pfile, list);
3446 if (list->tokens[0].type == CPP_EOF)
3447 break;
3448
15dad1d9
ZW
3449#if 0
3450 if (list->dirno)
3451 _cpp_handle_directive (pfile, list);
c5a04734 3452 else
15dad1d9 3453#endif
c5a04734 3454 _cpp_output_list (pfile, list);
15dad1d9 3455 _cpp_clear_toklist (list);
c5a04734
ZW
3456 }
3457}
3458
b8f41010 3459/* Temporary function for illustrative purposes. */
c5a04734
ZW
3460static void
3461_cpp_output_list (pfile, list)
3462 cpp_reader *pfile;
3463 cpp_toklist *list;
3464{
5d7ee2fa 3465 cpp_token *token, *comment, *comment_before = 0;
c5a04734
ZW
3466
3467 if (list->comments_used > 0)
5d7ee2fa
NB
3468 {
3469 comment = &list->comments[0];
3470 comment_before = &list->tokens[comment->aux];
3471 }
c5a04734 3472
3fef5b2b
NB
3473 token = &list->tokens[0];
3474 do
c5a04734 3475 {
3fef5b2b 3476 /* Output comments if -C. */
5d7ee2fa 3477 while (token == comment_before)
c5a04734 3478 {
5d7ee2fa
NB
3479 /* Make space for the comment, and copy it out. */
3480 CPP_RESERVE (pfile, TOKEN_LEN (comment));
f617b8e2 3481 pfile->limit = spell_token (pfile, comment, pfile->limit, 0);
5d7ee2fa
NB
3482
3483 /* Stop if no comments left, or no more comments appear
3484 before the current token. */
3485 comment++;
3486 if (comment == list->comments + list->comments_used)
3487 break;
3488 comment_before = &list->tokens[comment->aux];
c5a04734
ZW
3489 }
3490
3fef5b2b 3491 CPP_RESERVE (pfile, TOKEN_LEN (token));
f617b8e2 3492 pfile->limit = spell_token (pfile, token, pfile->limit, 1);
c5a04734 3493 }
3fef5b2b 3494 while (token++->type != CPP_VSPACE);
c5a04734
ZW
3495}
3496
3497#endif
This page took 0.500896 seconds and 5 git commands to generate.