]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
ia64.c (ia64_encode_section_info): Exit early for global register variables...
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23#include "config.h"
24#include "system.h"
25#include "intl.h"
26#include "cpplib.h"
27#include "cpphash.h"
28
ff2b53ef
ZW
29#define PEEKBUF(BUFFER, N) \
30 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
31#define GETBUF(BUFFER) \
32 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
33#define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
34
35#define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
36#define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
37#define GETC() GETBUF (CPP_BUFFER (pfile))
38#define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
45b966db
ZW
39
40static void skip_block_comment PARAMS ((cpp_reader *));
41static void skip_line_comment PARAMS ((cpp_reader *));
42static int maybe_macroexpand PARAMS ((cpp_reader *, long));
43static int skip_comment PARAMS ((cpp_reader *, int));
44static int copy_comment PARAMS ((cpp_reader *, int));
45static void skip_string PARAMS ((cpp_reader *, int));
46static void parse_string PARAMS ((cpp_reader *, int));
47static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
64aaf407 48static void null_warning PARAMS ((cpp_reader *, unsigned int));
45b966db 49
f2d5f0cc
ZW
50static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
51 size_t, FILE *));
1368ee70
ZW
52static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
53 unsigned int));
54static void bump_column PARAMS ((cpp_printer *, unsigned int,
55 unsigned int));
c5a04734 56static void expand_name_space PARAMS ((cpp_toklist *, unsigned int));
1368ee70
ZW
57static void expand_token_space PARAMS ((cpp_toklist *));
58static void init_token_list PARAMS ((cpp_reader *, cpp_toklist *, int));
9e62c811
ZW
59static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
60 unsigned int));
f2d5f0cc 61
c5a04734
ZW
62#define auto_expand_name_space(list) \
63 expand_name_space ((list), (list)->name_cap / 2)
64
b8f41010
NB
65#ifdef NEW_LEXER
66
67static void expand_comment_space PARAMS ((cpp_toklist *));
68void init_trigraph_map PARAMS ((void));
69static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
70 unsigned char *));
71static const unsigned char *backslash_start PARAMS ((cpp_reader *,
72 const unsigned char *));
73static int skip_block_comment2 PARAMS ((cpp_reader *));
74static int skip_line_comment2 PARAMS ((cpp_reader *));
75static void skip_whitespace PARAMS ((cpp_reader *, int));
76static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
77static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
78static void parse_string2 PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
79 unsigned int));
80static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
81static void save_comment PARAMS ((cpp_toklist *, const unsigned char *,
82 unsigned int, unsigned int, unsigned int));
83void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
84
85static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
86
87unsigned int spell_string PARAMS ((unsigned char *, cpp_toklist *,
88 cpp_token *token));
89unsigned int spell_comment PARAMS ((unsigned char *, cpp_toklist *,
90 cpp_token *token));
91unsigned int spell_name PARAMS ((unsigned char *, cpp_toklist *,
92 cpp_token *token));
93
94typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
95 cpp_token *));
96
97/* Macros on a cpp_name. */
98#define INIT_NAME(list, name) \
99 do {(name).len = 0; (name).offset = (list)->name_used;} while (0)
100
101#define IS_DIRECTIVE(list) (TOK_TYPE (list, 0) == CPP_HASH)
102#define COLUMN(cur) ((cur) - buffer->line_base)
103
104/* Maybe put these in the ISTABLE eventually. */
105#define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
106#define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
107
108/* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
109 character, if any, is in buffer. */
110#define handle_newline(cur, limit, c) \
111 do {\
112 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
113 (cur)++; \
114 CPP_BUMP_LINE_CUR (pfile, (cur)); \
115 } while (0)
116
117#define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
118#define PREV_TOKEN_TYPE (cur_token[-1].type)
119
120#define SPELL_TEXT 0
121#define SPELL_HANDLER 1
122#define SPELL_CHAR 2
123#define SPELL_NONE 3
124#define SPELL_EOL 4
125
126#define T(e, s) {SPELL_TEXT, s},
127#define H(e, s) {SPELL_HANDLER, (PTR) s},
128#define C(e, s) {SPELL_CHAR, s},
129#define N(e, s) {SPELL_NONE, s},
130#define E(e, s) {SPELL_EOL, s},
131
132static const struct token_spelling
133{
134 unsigned char type;
135 PTR speller;
136} token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
137
138#undef T
139#undef H
140#undef C
141#undef N
142#undef E
143
144#define PUSH_TOKEN(ttype) cur_token++->type = ttype
145#define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
146#define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
147#define BACKUP_DIGRAPH(ttype) do { \
148 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
149
150/* If there is this many bytes in a buffer, you have enough room to
151 spell the token, not including preceding whitespace. */
152#define TOKEN_LEN(token) (4 + (token_spellings[token->type].type == \
153 SPELL_HANDLER ? token->val.name.len: 0))
154
155#endif
156
45b966db
ZW
157/* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
158
159void
160_cpp_grow_token_buffer (pfile, n)
161 cpp_reader *pfile;
162 long n;
163{
164 long old_written = CPP_WRITTEN (pfile);
165 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
166 pfile->token_buffer = (U_CHAR *)
167 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
168 CPP_SET_WRITTEN (pfile, old_written);
169}
170
45b966db
ZW
171/* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
172 If BUFFER != NULL, then use the LENGTH characters in BUFFER
173 as the new input buffer.
174 Return the new buffer, or NULL on failure. */
175
176cpp_buffer *
177cpp_push_buffer (pfile, buffer, length)
178 cpp_reader *pfile;
179 const U_CHAR *buffer;
180 long length;
181{
182 cpp_buffer *buf = CPP_BUFFER (pfile);
183 cpp_buffer *new;
184 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
185 {
186 cpp_fatal (pfile, "macro or `#include' recursion too deep");
187 return NULL;
188 }
189
190 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
191
192 new->if_stack = pfile->if_stack;
45b966db 193 new->buf = new->cur = buffer;
ff2b53ef 194 new->rlimit = buffer + length;
45b966db 195 new->prev = buf;
ff2b53ef 196 new->mark = NULL;
45b966db
ZW
197 new->line_base = NULL;
198
199 CPP_BUFFER (pfile) = new;
200 return new;
201}
202
203cpp_buffer *
204cpp_pop_buffer (pfile)
205 cpp_reader *pfile;
206{
207 cpp_buffer *buf = CPP_BUFFER (pfile);
208 if (ACTIVE_MARK_P (pfile))
209 cpp_ice (pfile, "mark active in cpp_pop_buffer");
c56c2073
ZW
210
211 if (buf->ihash)
212 {
213 _cpp_unwind_if_stack (pfile, buf);
214 if (buf->buf)
215 free ((PTR) buf->buf);
216 if (pfile->system_include_depth)
217 pfile->system_include_depth--;
218 if (pfile->potential_control_macro)
219 {
220 buf->ihash->control_macro = pfile->potential_control_macro;
221 pfile->potential_control_macro = 0;
222 }
223 pfile->input_stack_listing_current = 0;
224 }
225 else if (buf->macro)
226 {
227 HASHNODE *m = buf->macro;
228
229 m->disabled = 0;
230 if ((m->type == T_FMACRO && buf->mapped)
231 || m->type == T_SPECLINE || m->type == T_FILE
232 || m->type == T_BASE_FILE || m->type == T_INCLUDE_LEVEL
233 || m->type == T_STDC)
234 free ((PTR) buf->buf);
235 }
45b966db
ZW
236 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
237 free (buf);
238 pfile->buffer_stack_depth--;
239 return CPP_BUFFER (pfile);
240}
241
f2d5f0cc
ZW
242/* Deal with the annoying semantics of fwrite. */
243static void
244safe_fwrite (pfile, buf, len, fp)
245 cpp_reader *pfile;
246 const U_CHAR *buf;
247 size_t len;
248 FILE *fp;
249{
250 size_t count;
45b966db 251
f2d5f0cc
ZW
252 while (len)
253 {
254 count = fwrite (buf, 1, len, fp);
255 if (count == 0)
256 goto error;
257 len -= count;
258 buf += count;
259 }
260 return;
261
262 error:
263 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
264}
265
266/* Notify the compiler proper that the current line number has jumped,
267 or the current file name has changed. */
268
269static void
1368ee70 270output_line_command (pfile, print, line)
45b966db 271 cpp_reader *pfile;
f2d5f0cc 272 cpp_printer *print;
1368ee70 273 unsigned int line;
45b966db 274{
1368ee70 275 cpp_buffer *ip = cpp_file_buffer (pfile);
f2d5f0cc
ZW
276 enum { same = 0, enter, leave, rname } change;
277 static const char * const codes[] = { "", " 1", " 2", "" };
278
279 if (CPP_OPTION (pfile, no_line_commands))
280 return;
281
f2d5f0cc
ZW
282 /* Determine whether the current filename has changed, and if so,
283 how. 'nominal_fname' values are unique, so they can be compared
284 by comparing pointers. */
285 if (ip->nominal_fname == print->last_fname)
286 change = same;
287 else
45b966db 288 {
f2d5f0cc
ZW
289 if (pfile->buffer_stack_depth == print->last_bsd)
290 change = rname;
291 else
45b966db 292 {
f2d5f0cc
ZW
293 if (pfile->buffer_stack_depth > print->last_bsd)
294 change = enter;
295 else
296 change = leave;
297 print->last_bsd = pfile->buffer_stack_depth;
45b966db 298 }
f2d5f0cc 299 print->last_fname = ip->nominal_fname;
45b966db 300 }
f2d5f0cc
ZW
301 /* If the current file has not changed, we can output a few newlines
302 instead if we want to increase the line number by a small amount.
303 We cannot do this if print->lineno is zero, because that means we
304 haven't output any line commands yet. (The very first line
305 command output is a `same_file' command.) */
306 if (change == same && print->lineno != 0
307 && line >= print->lineno && line < print->lineno + 8)
45b966db 308 {
f2d5f0cc 309 while (line > print->lineno)
45b966db 310 {
f2d5f0cc
ZW
311 putc ('\n', print->outf);
312 print->lineno++;
45b966db 313 }
f2d5f0cc 314 return;
45b966db 315 }
f2d5f0cc
ZW
316
317#ifndef NO_IMPLICIT_EXTERN_C
318 if (CPP_OPTION (pfile, cplusplus))
319 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
320 codes[change],
321 ip->system_header_p ? " 3" : "",
322 (ip->system_header_p == 2) ? " 4" : "");
323 else
324#endif
325 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
326 codes[change],
327 ip->system_header_p ? " 3" : "");
328 print->lineno = line;
329}
330
331/* Write the contents of the token_buffer to the output stream, and
332 clear the token_buffer. Also handles generating line commands and
333 keeping track of file transitions. */
334
335void
336cpp_output_tokens (pfile, print)
337 cpp_reader *pfile;
338 cpp_printer *print;
339{
1368ee70
ZW
340 cpp_buffer *ip;
341
f6fab919
ZW
342 if (CPP_WRITTEN (pfile) - print->written)
343 {
344 if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
345 print->lineno++;
346 safe_fwrite (pfile, pfile->token_buffer,
347 CPP_WRITTEN (pfile) - print->written, print->outf);
348 }
1368ee70
ZW
349
350 ip = cpp_file_buffer (pfile);
351 if (ip)
352 output_line_command (pfile, print, CPP_BUF_LINE (ip));
353
f2d5f0cc 354 CPP_SET_WRITTEN (pfile, print->written);
45b966db
ZW
355}
356
1368ee70
ZW
357/* Helper for cpp_output_list - increases the column number to match
358 what we expect it to be. */
359
360static void
361bump_column (print, from, to)
362 cpp_printer *print;
363 unsigned int from, to;
364{
365 unsigned int tabs, spcs;
366 unsigned int delta = to - from;
367
368 /* Only if FROM is 0, advance by tabs. */
369 if (from == 0)
370 tabs = delta / 8, spcs = delta % 8;
371 else
372 tabs = 0, spcs = delta;
373
374 while (tabs--) putc ('\t', print->outf);
375 while (spcs--) putc (' ', print->outf);
376}
377
378/* Write out the list L onto pfile->token_buffer. This function is
379 incomplete:
380
381 1) pfile->token_buffer is not going to continue to exist.
382 2) At the moment, tokens don't carry the information described
383 in cpplib.h; they are all strings.
384 3) The list has to be a complete line, and has to be written starting
385 at the beginning of a line. */
386
387void
388cpp_output_list (pfile, print, list)
389 cpp_reader *pfile;
390 cpp_printer *print;
391 const cpp_toklist *list;
392{
393 unsigned int i;
394 unsigned int curcol = 1;
395
396 /* XXX Probably does not do what is intended. */
397 if (print->lineno != list->line)
398 output_line_command (pfile, print, list->line);
399
400 for (i = 0; i < list->tokens_used; i++)
401 {
1920de47 402 if (TOK_TYPE (list, i) == CPP_VSPACE)
1368ee70
ZW
403 {
404 output_line_command (pfile, print, list->tokens[i].aux);
405 continue;
406 }
407
1920de47 408 if (curcol < TOK_COL (list, i))
1368ee70
ZW
409 {
410 /* Insert space to bring the column to what it should be. */
1920de47
ZW
411 bump_column (print, curcol - 1, TOK_COL (list, i));
412 curcol = TOK_COL (list, i);
1368ee70
ZW
413 }
414 /* XXX We may have to insert space to prevent an accidental
415 token paste. */
1920de47
ZW
416 safe_fwrite (pfile, TOK_NAME (list, i), TOK_LEN (list, i), print->outf);
417 curcol += TOK_LEN (list, i);
1368ee70
ZW
418 }
419}
420
f2d5f0cc
ZW
421/* Scan a string (which may have escape marks), perform macro expansion,
422 and write the result to the token_buffer. */
45b966db
ZW
423
424void
f2d5f0cc 425_cpp_expand_to_buffer (pfile, buf, length)
45b966db
ZW
426 cpp_reader *pfile;
427 const U_CHAR *buf;
428 int length;
429{
c56c2073 430 cpp_buffer *stop;
f2d5f0cc 431 enum cpp_ttype token;
f6fab919 432 U_CHAR *buf1;
45b966db
ZW
433
434 if (length < 0)
435 {
436 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
437 return;
438 }
439
f6fab919
ZW
440 /* Copy the buffer, because it might be in an unsafe place - for
441 example, a sequence on the token_buffer, where the pointers will
442 be invalidated if we enlarge the token_buffer. */
443 buf1 = alloca (length);
444 memcpy (buf1, buf, length);
445
45b966db 446 /* Set up the input on the input stack. */
c56c2073
ZW
447 stop = CPP_BUFFER (pfile);
448 if (cpp_push_buffer (pfile, buf1, length) == NULL)
45b966db 449 return;
c56c2073 450 CPP_BUFFER (pfile)->has_escapes = 1;
45b966db
ZW
451
452 /* Scan the input, create the output. */
f2d5f0cc
ZW
453 for (;;)
454 {
455 token = cpp_get_token (pfile);
c56c2073 456 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
f2d5f0cc 457 break;
f2d5f0cc 458 }
45b966db
ZW
459}
460
c56c2073 461/* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
f2d5f0cc
ZW
462
463void
464cpp_scan_buffer_nooutput (pfile)
465 cpp_reader *pfile;
466{
c56c2073 467 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
f2d5f0cc
ZW
468 enum cpp_ttype token;
469 unsigned int old_written = CPP_WRITTEN (pfile);
470 /* In no-output mode, we can ignore everything but directives. */
471 for (;;)
472 {
473 if (! pfile->only_seen_white)
474 _cpp_skip_rest_of_line (pfile);
475 token = cpp_get_token (pfile);
c56c2073 476 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
f2d5f0cc 477 break;
f2d5f0cc
ZW
478 }
479 CPP_SET_WRITTEN (pfile, old_written);
480}
481
c56c2073 482/* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
f2d5f0cc
ZW
483
484void
485cpp_scan_buffer (pfile, print)
486 cpp_reader *pfile;
487 cpp_printer *print;
488{
c56c2073 489 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
f2d5f0cc
ZW
490 enum cpp_ttype token;
491
492 for (;;)
493 {
494 token = cpp_get_token (pfile);
c56c2073 495 if (token == CPP_EOF || token == CPP_VSPACE
f2d5f0cc
ZW
496 /* XXX Temporary kluge - force flush after #include only */
497 || (token == CPP_DIRECTIVE
498 && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
499 {
500 cpp_output_tokens (pfile, print);
c56c2073 501 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
f2d5f0cc 502 return;
f2d5f0cc
ZW
503 }
504 }
505}
506
45b966db
ZW
507/* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
508
509cpp_buffer *
510cpp_file_buffer (pfile)
511 cpp_reader *pfile;
512{
513 cpp_buffer *ip;
514
515 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
516 if (ip->ihash != NULL)
517 return ip;
518 return NULL;
519}
520
1368ee70
ZW
521/* Token-buffer helper functions. */
522
523/* Expand a token list's string space. */
524static void
c5a04734 525expand_name_space (list, len)
1368ee70 526 cpp_toklist *list;
c5a04734
ZW
527 unsigned int len;
528{
529 list->name_cap += len;
530 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
1368ee70
ZW
531}
532
533/* Expand the number of tokens in a list. */
534static void
535expand_token_space (list)
536 cpp_toklist *list;
537{
538 list->tokens_cap *= 2;
539 list->tokens = (cpp_token *)
c5a04734
ZW
540 xrealloc (list->tokens - 1, (list->tokens_cap + 1) * sizeof (cpp_token));
541 list->tokens++; /* Skip the dummy. */
1368ee70
ZW
542}
543
c5a04734
ZW
544/* Initialize a token list. We allocate an extra token in front of
545 the token list, as this allows us to always peek at the previous
546 token without worrying about underflowing the list. */
1368ee70
ZW
547static void
548init_token_list (pfile, list, recycle)
549 cpp_reader *pfile;
550 cpp_toklist *list;
551 int recycle;
552{
c5a04734
ZW
553 /* Recycling a used list saves 3 free-malloc pairs. */
554 if (!recycle)
1368ee70 555 {
c5a04734
ZW
556 /* Initialize token space. Put a dummy token before the start
557 that will fail matches. */
558 list->tokens_cap = 256; /* 4K's worth. */
1368ee70 559 list->tokens = (cpp_token *)
c5a04734
ZW
560 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
561 list->tokens[0].type = CPP_EOF;
562 list->tokens++;
1368ee70 563
c5a04734 564 /* Initialize name space. */
1368ee70 565 list->name_cap = 1024;
1368ee70 566 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
c5a04734
ZW
567
568 /* Only create a comment space on demand. */
569 list->comments_cap = 0;
570 list->comments = 0;
1368ee70
ZW
571 }
572
c5a04734
ZW
573 list->tokens_used = 0;
574 list->name_used = 0;
575 list->comments_used = 0;
9e62c811
ZW
576 if (pfile->buffer)
577 list->line = pfile->buffer->lineno;
1368ee70
ZW
578 list->dir_handler = 0;
579 list->dir_flags = 0;
580}
581
582/* Scan an entire line and create a token list for it. Does not
583 macro-expand or execute directives. */
584
585void
586_cpp_scan_line (pfile, list)
587 cpp_reader *pfile;
588 cpp_toklist *list;
589{
590 int i, col;
591 long written, len;
592 enum cpp_ttype type;
9e62c811 593 int space_before;
1368ee70
ZW
594
595 init_token_list (pfile, list, 1);
596
597 written = CPP_WRITTEN (pfile);
598 i = 0;
9e62c811 599 space_before = 0;
1368ee70
ZW
600 for (;;)
601 {
602 col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
603 type = _cpp_lex_token (pfile);
604 len = CPP_WRITTEN (pfile) - written;
605 CPP_SET_WRITTEN (pfile, written);
606 if (type == CPP_HSPACE)
9e62c811
ZW
607 {
608 if (CPP_PEDANTIC (pfile))
609 pedantic_whitespace (pfile, pfile->token_buffer + written, len);
610 space_before = 1;
611 continue;
612 }
0f89df67
ZW
613 else if (type == CPP_COMMENT)
614 /* Only happens when processing -traditional macro definitions.
615 Do not give this a token entry, but do not change space_before
616 either. */
617 continue;
1368ee70
ZW
618
619 if (list->tokens_used >= list->tokens_cap)
620 expand_token_space (list);
621 if (list->name_used + len >= list->name_cap)
bb1ec1d7 622 expand_name_space (list, list->name_used + len + 1 - list->name_cap);
1368ee70 623
9e62c811
ZW
624 if (type == CPP_MACRO)
625 type = CPP_NAME;
626
1368ee70 627 list->tokens_used++;
1920de47
ZW
628 TOK_TYPE (list, i) = type;
629 TOK_COL (list, i) = col;
630 TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
9e62c811 631
1368ee70
ZW
632 if (type == CPP_VSPACE)
633 break;
634
1920de47
ZW
635 TOK_LEN (list, i) = len;
636 TOK_OFFSET (list, i) = list->name_used;
637 memcpy (TOK_NAME (list, i), CPP_PWRITTEN (pfile), len);
1368ee70
ZW
638 list->name_used += len;
639 i++;
9e62c811 640 space_before = 0;
1368ee70 641 }
1920de47 642 TOK_AUX (list, i) = CPP_BUFFER (pfile)->lineno + 1;
9e62c811
ZW
643
644 /* XXX Temporary kluge: put back the newline. */
645 FORWARD(-1);
1368ee70
ZW
646}
647
648
45b966db
ZW
649/* Skip a C-style block comment. We know it's a comment, and point is
650 at the second character of the starter. */
651static void
652skip_block_comment (pfile)
653 cpp_reader *pfile;
654{
3a2b2c7a 655 unsigned int line, col;
61474454 656 const U_CHAR *limit, *cur;
45b966db
ZW
657
658 FORWARD(1);
3a2b2c7a
ZW
659 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
660 col = CPP_BUF_COL (CPP_BUFFER (pfile));
61474454
NB
661 limit = CPP_BUFFER (pfile)->rlimit;
662 cur = CPP_BUFFER (pfile)->cur;
663
664 while (cur < limit)
45b966db 665 {
61474454
NB
666 char c = *cur++;
667 if (c == '\n' || c == '\r')
45b966db
ZW
668 {
669 /* \r cannot be a macro escape marker here. */
670 if (!ACTIVE_MARK_P (pfile))
61474454
NB
671 CPP_BUMP_LINE_CUR (pfile, cur);
672 }
673 else if (c == '*')
674 {
675 /* Check for teminator. */
676 if (cur < limit && *cur == '/')
677 goto out;
678
679 /* Warn about comment starter embedded in comment. */
680 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
681 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
682 cur - CPP_BUFFER (pfile)->line_base,
683 "'/*' within comment");
45b966db 684 }
45b966db 685 }
61474454
NB
686
687 cpp_error_with_line (pfile, line, col, "unterminated comment");
688 cur--;
689 out:
690 CPP_BUFFER (pfile)->cur = cur + 1;
45b966db
ZW
691}
692
693/* Skip a C++/Chill line comment. We know it's a comment, and point
694 is at the second character of the initiator. */
695static void
696skip_line_comment (pfile)
697 cpp_reader *pfile;
698{
699 FORWARD(1);
700 for (;;)
701 {
702 int c = GETC ();
703
704 /* We don't have to worry about EOF in here. */
705 if (c == '\n')
706 {
707 /* Don't consider final '\n' to be part of comment. */
708 FORWARD(-1);
709 return;
710 }
711 else if (c == '\r')
712 {
713 /* \r cannot be a macro escape marker here. */
714 if (!ACTIVE_MARK_P (pfile))
715 CPP_BUMP_LINE (pfile);
ae79697b 716 if (CPP_OPTION (pfile, warn_comments))
45b966db
ZW
717 cpp_warning (pfile, "backslash-newline within line comment");
718 }
719 }
720}
721
722/* Skip a comment - C, C++, or Chill style. M is the first character
723 of the comment marker. If this really is a comment, skip to its
724 end and return ' '. If this is not a comment, return M (which will
725 be '/' or '-'). */
726
727static int
728skip_comment (pfile, m)
729 cpp_reader *pfile;
730 int m;
731{
732 if (m == '/' && PEEKC() == '*')
733 {
734 skip_block_comment (pfile);
735 return ' ';
736 }
737 else if (m == '/' && PEEKC() == '/')
738 {
739 if (CPP_BUFFER (pfile)->system_header_p)
740 {
741 /* We silently allow C++ comments in system headers, irrespective
742 of conformance mode, because lots of busted systems do that
743 and trying to clean it up in fixincludes is a nightmare. */
744 skip_line_comment (pfile);
745 return ' ';
746 }
ae79697b 747 else if (CPP_OPTION (pfile, cplusplus_comments))
45b966db 748 {
0f89df67 749 if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
45b966db 750 {
0f89df67
ZW
751 if (CPP_WTRADITIONAL (pfile))
752 cpp_pedwarn (pfile,
753 "C++ style comments are not allowed in traditional C");
754 else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
755 cpp_pedwarn (pfile,
756 "C++ style comments are not allowed in ISO C89");
757 if (CPP_WTRADITIONAL (pfile)
758 || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
759 cpp_pedwarn (pfile,
45b966db
ZW
760 "(this will be reported only once per input file)");
761 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
762 }
763 skip_line_comment (pfile);
764 return ' ';
765 }
766 else
767 return m;
768 }
769 else if (m == '-' && PEEKC() == '-'
ae79697b 770 && CPP_OPTION (pfile, chill))
45b966db
ZW
771 {
772 skip_line_comment (pfile);
773 return ' ';
774 }
775 else
776 return m;
777}
778
779/* Identical to skip_comment except that it copies the comment into the
780 token_buffer. This is used if !discard_comments. */
781static int
782copy_comment (pfile, m)
783 cpp_reader *pfile;
784 int m;
785{
786 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
787 const U_CHAR *limit;
788
789 if (skip_comment (pfile, m) == m)
790 return m;
791
792 limit = CPP_BUFFER (pfile)->cur;
793 CPP_RESERVE (pfile, limit - start + 2);
794 CPP_PUTC_Q (pfile, m);
795 for (; start <= limit; start++)
796 if (*start != '\r')
797 CPP_PUTC_Q (pfile, *start);
798
799 return ' ';
800}
801
64aaf407
NB
802static void
803null_warning (pfile, count)
804 cpp_reader *pfile;
805 unsigned int count;
806{
807 if (count == 1)
808 cpp_warning (pfile, "embedded null character ignored");
809 else
810 cpp_warning (pfile, "embedded null characters ignored");
811}
812
45b966db
ZW
813/* Skip whitespace \-newline and comments. Does not macro-expand. */
814
815void
816_cpp_skip_hspace (pfile)
817 cpp_reader *pfile;
818{
64aaf407 819 unsigned int null_count = 0;
45b966db 820 int c;
64aaf407 821
45b966db
ZW
822 while (1)
823 {
824 c = GETC();
825 if (c == EOF)
64aaf407 826 goto out;
45b966db
ZW
827 else if (is_hspace(c))
828 {
829 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
830 cpp_pedwarn (pfile, "%s in preprocessing directive",
831 c == '\f' ? "formfeed" : "vertical tab");
64aaf407
NB
832 else if (c == '\0')
833 null_count++;
45b966db
ZW
834 }
835 else if (c == '\r')
836 {
837 /* \r is a backslash-newline marker if !has_escapes, and
838 a deletable-whitespace or no-reexpansion marker otherwise. */
839 if (CPP_BUFFER (pfile)->has_escapes)
840 {
841 if (PEEKC() == ' ')
842 FORWARD(1);
843 else
844 break;
845 }
846 else
847 CPP_BUMP_LINE (pfile);
848 }
849 else if (c == '/' || c == '-')
850 {
851 c = skip_comment (pfile, c);
852 if (c != ' ')
853 break;
854 }
855 else
856 break;
857 }
858 FORWARD(-1);
64aaf407
NB
859 out:
860 if (null_count)
861 null_warning (pfile, null_count);
45b966db
ZW
862}
863
864/* Read and discard the rest of the current line. */
865
866void
867_cpp_skip_rest_of_line (pfile)
868 cpp_reader *pfile;
869{
870 for (;;)
871 {
872 int c = GETC();
873 switch (c)
874 {
875 case '\n':
876 FORWARD(-1);
877 case EOF:
878 return;
879
880 case '\r':
881 if (! CPP_BUFFER (pfile)->has_escapes)
882 CPP_BUMP_LINE (pfile);
883 break;
884
885 case '\'':
886 case '\"':
887 skip_string (pfile, c);
888 break;
889
890 case '/':
891 case '-':
892 skip_comment (pfile, c);
893 break;
894
895 case '\f':
896 case '\v':
897 if (CPP_PEDANTIC (pfile))
898 cpp_pedwarn (pfile, "%s in preprocessing directive",
899 c == '\f' ? "formfeed" : "vertical tab");
900 break;
901
902 }
903 }
904}
905
906/* Parse an identifier starting with C. */
907
908void
909_cpp_parse_name (pfile, c)
910 cpp_reader *pfile;
911 int c;
912{
913 for (;;)
914 {
915 if (! is_idchar(c))
916 {
917 FORWARD (-1);
918 break;
919 }
920
921 if (c == '$' && CPP_PEDANTIC (pfile))
922 cpp_pedwarn (pfile, "`$' in identifier");
923
924 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
925 CPP_PUTC_Q (pfile, c);
926 c = GETC();
927 if (c == EOF)
928 break;
929 }
45b966db
ZW
930 return;
931}
932
933/* Parse and skip over a string starting with C. A single quoted
934 string is treated like a double -- some programs (e.g., troff) are
935 perverse this way. (However, a single quoted string is not allowed
936 to extend over multiple lines.) */
937static void
938skip_string (pfile, c)
939 cpp_reader *pfile;
940 int c;
941{
3a2b2c7a 942 unsigned int start_line, start_column;
64aaf407 943 unsigned int null_count = 0;
45b966db 944
3a2b2c7a
ZW
945 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
946 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
45b966db
ZW
947 while (1)
948 {
949 int cc = GETC();
950 switch (cc)
951 {
952 case EOF:
953 cpp_error_with_line (pfile, start_line, start_column,
954 "unterminated string or character constant");
955 if (pfile->multiline_string_line != start_line
956 && pfile->multiline_string_line != 0)
957 cpp_error_with_line (pfile,
958 pfile->multiline_string_line, -1,
959 "possible real start of unterminated constant");
960 pfile->multiline_string_line = 0;
64aaf407 961 goto out;
45b966db 962
64aaf407
NB
963 case '\0':
964 null_count++;
965 break;
966
45b966db
ZW
967 case '\n':
968 CPP_BUMP_LINE (pfile);
969 /* In Fortran and assembly language, silently terminate
970 strings of either variety at end of line. This is a
971 kludge around not knowing where comments are in these
972 languages. */
ae79697b
ZW
973 if (CPP_OPTION (pfile, lang_fortran)
974 || CPP_OPTION (pfile, lang_asm))
45b966db
ZW
975 {
976 FORWARD(-1);
64aaf407 977 goto out;
45b966db
ZW
978 }
979 /* Character constants may not extend over multiple lines.
980 In Standard C, neither may strings. We accept multiline
981 strings as an extension. */
982 if (c == '\'')
983 {
984 cpp_error_with_line (pfile, start_line, start_column,
985 "unterminated character constant");
986 FORWARD(-1);
64aaf407 987 goto out;
45b966db
ZW
988 }
989 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
990 cpp_pedwarn_with_line (pfile, start_line, start_column,
991 "string constant runs past end of line");
992 if (pfile->multiline_string_line == 0)
993 pfile->multiline_string_line = start_line;
994 break;
995
996 case '\r':
997 if (CPP_BUFFER (pfile)->has_escapes)
998 {
999 cpp_ice (pfile, "\\r escape inside string constant");
1000 FORWARD(1);
1001 }
1002 else
1003 /* Backslash newline is replaced by nothing at all. */
1004 CPP_BUMP_LINE (pfile);
1005 break;
1006
1007 case '\\':
1008 FORWARD(1);
1009 break;
1010
1011 case '\"':
1012 case '\'':
1013 if (cc == c)
64aaf407 1014 goto out;
45b966db
ZW
1015 break;
1016 }
1017 }
64aaf407
NB
1018
1019 out:
1020 if (null_count == 1)
1021 cpp_warning (pfile, "null character in string or character constant");
1022 else if (null_count > 1)
1023 cpp_warning (pfile, "null characters in string or character constant");
45b966db
ZW
1024}
1025
1026/* Parse a string and copy it to the output. */
1027
1028static void
1029parse_string (pfile, c)
1030 cpp_reader *pfile;
1031 int c;
1032{
1033 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
1034 const U_CHAR *limit;
1035
1036 skip_string (pfile, c);
1037
1038 limit = CPP_BUFFER (pfile)->cur;
1039 CPP_RESERVE (pfile, limit - start + 2);
1040 CPP_PUTC_Q (pfile, c);
1041 for (; start < limit; start++)
1042 if (*start != '\r')
1043 CPP_PUTC_Q (pfile, *start);
1044}
1045
1046/* Read an assertion into the token buffer, converting to
1047 canonical form: `#predicate(a n swe r)' The next non-whitespace
1048 character to read should be the first letter of the predicate.
1049 Returns 0 for syntax error, 1 for bare predicate, 2 for predicate
1050 with answer (see callers for why). In case of 0, an error has been
1051 printed. */
1052int
1053_cpp_parse_assertion (pfile)
1054 cpp_reader *pfile;
1055{
1056 int c, dropwhite;
1057 _cpp_skip_hspace (pfile);
1058 c = PEEKC();
bfab56e7
ZW
1059 if (c == '\n')
1060 {
1061 cpp_error (pfile, "assertion without predicate");
1062 return 0;
1063 }
1064 else if (! is_idstart(c))
45b966db
ZW
1065 {
1066 cpp_error (pfile, "assertion predicate is not an identifier");
1067 return 0;
1068 }
1069 CPP_PUTC(pfile, '#');
1070 FORWARD(1);
1071 _cpp_parse_name (pfile, c);
1072
1073 c = PEEKC();
1074 if (c != '(')
1075 {
1076 if (is_hspace(c) || c == '\r')
1077 _cpp_skip_hspace (pfile);
1078 c = PEEKC();
1079 }
1080 if (c != '(')
1081 return 1;
1082
1083 CPP_PUTC(pfile, '(');
1084 FORWARD(1);
1085 dropwhite = 1;
1086 while ((c = GETC()) != ')')
1087 {
1088 if (is_space(c))
1089 {
1090 if (! dropwhite)
1091 {
1092 CPP_PUTC(pfile, ' ');
1093 dropwhite = 1;
1094 }
1095 }
1096 else if (c == '\n' || c == EOF)
1097 {
1098 if (c == '\n') FORWARD(-1);
1099 cpp_error (pfile, "un-terminated assertion answer");
1100 return 0;
1101 }
1102 else if (c == '\r')
1103 /* \r cannot be a macro escape here. */
1104 CPP_BUMP_LINE (pfile);
1105 else
1106 {
1107 CPP_PUTC (pfile, c);
1108 dropwhite = 0;
1109 }
1110 }
1111
1112 if (pfile->limit[-1] == ' ')
1113 pfile->limit[-1] = ')';
1114 else if (pfile->limit[-1] == '(')
1115 {
1116 cpp_error (pfile, "empty token sequence in assertion");
1117 return 0;
1118 }
1119 else
1120 CPP_PUTC (pfile, ')');
1121
45b966db
ZW
1122 return 2;
1123}
1124
1125/* Get the next token, and add it to the text in pfile->token_buffer.
1126 Return the kind of token we got. */
1127
3a2b2c7a 1128enum cpp_ttype
45b966db
ZW
1129_cpp_lex_token (pfile)
1130 cpp_reader *pfile;
1131{
5eec0563 1132 register int c, c2;
3a2b2c7a 1133 enum cpp_ttype token;
45b966db 1134
f2d5f0cc
ZW
1135 if (CPP_BUFFER (pfile) == NULL)
1136 return CPP_EOF;
1137
45b966db
ZW
1138 get_next:
1139 c = GETC();
1140 switch (c)
1141 {
1142 case EOF:
1143 return CPP_EOF;
1144
1145 case '/':
1146 if (PEEKC () == '=')
1147 goto op2;
1148
1149 comment:
ae79697b 1150 if (CPP_OPTION (pfile, discard_comments))
45b966db
ZW
1151 c = skip_comment (pfile, c);
1152 else
1153 c = copy_comment (pfile, c);
1154 if (c != ' ')
1155 goto randomchar;
1156
1157 /* Comments are equivalent to spaces.
1158 For -traditional, a comment is equivalent to nothing. */
ff2b53ef 1159 if (!CPP_OPTION (pfile, discard_comments))
45b966db 1160 return CPP_COMMENT;
9e62c811 1161 else if (CPP_TRADITIONAL (pfile))
0f89df67
ZW
1162 {
1163 if (pfile->parsing_define_directive)
1164 return CPP_COMMENT;
1165 goto get_next;
1166 }
45b966db
ZW
1167 else
1168 {
1169 CPP_PUTC (pfile, c);
1170 return CPP_HSPACE;
1171 }
1172
1173 case '#':
5eec0563
JM
1174 CPP_PUTC (pfile, c);
1175
1176 hash:
45b966db
ZW
1177 if (pfile->parsing_if_directive)
1178 {
f2d5f0cc 1179 CPP_ADJUST_WRITTEN (pfile, -1);
bfab56e7
ZW
1180 if (_cpp_parse_assertion (pfile))
1181 return CPP_ASSERTION;
5eec0563 1182 return CPP_OTHER;
45b966db
ZW
1183 }
1184
9e62c811 1185 if (pfile->parsing_define_directive)
45b966db 1186 {
5eec0563
JM
1187 c2 = PEEKC ();
1188 if (c2 == '#')
1189 {
1190 FORWARD (1);
1191 CPP_PUTC (pfile, c2);
1192 }
1193 else if (c2 == '%' && PEEKN (1) == ':')
1194 {
1195 /* Digraph: "%:" == "#". */
1196 FORWARD (1);
1197 CPP_RESERVE (pfile, 2);
1198 CPP_PUTC_Q (pfile, c2);
1199 CPP_PUTC_Q (pfile, GETC ());
1200 }
1201 else
1368ee70 1202 return CPP_HASH;
5eec0563 1203
1368ee70 1204 return CPP_PASTE;
45b966db
ZW
1205 }
1206
1207 if (!pfile->only_seen_white)
5eec0563
JM
1208 return CPP_OTHER;
1209
1210 /* Remove the "#" or "%:" from the token buffer. */
1211 CPP_ADJUST_WRITTEN (pfile, (c == '#' ? -1 : -2));
45b966db
ZW
1212 return CPP_DIRECTIVE;
1213
1214 case '\"':
1215 case '\'':
1216 parse_string (pfile, c);
45b966db
ZW
1217 return c == '\'' ? CPP_CHAR : CPP_STRING;
1218
1219 case '$':
ae79697b 1220 if (!CPP_OPTION (pfile, dollars_in_ident))
45b966db
ZW
1221 goto randomchar;
1222 goto letter;
1223
1224 case ':':
5eec0563
JM
1225 c2 = PEEKC ();
1226 /* Digraph: ":>" == "]". */
1227 if (c2 == '>'
1228 || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
45b966db
ZW
1229 goto op2;
1230 goto randomchar;
1231
1232 case '&':
1233 case '+':
1234 case '|':
1235 c2 = PEEKC ();
1236 if (c2 == c || c2 == '=')
1237 goto op2;
1238 goto randomchar;
1239
5eec0563
JM
1240 case '%':
1241 /* Digraphs: "%:" == "#", "%>" == "}". */
1242 c2 = PEEKC ();
1243 if (c2 == ':')
1244 {
1245 FORWARD (1);
1246 CPP_RESERVE (pfile, 2);
1247 CPP_PUTC_Q (pfile, c);
1248 CPP_PUTC_Q (pfile, c2);
1249 goto hash;
1250 }
1251 else if (c2 == '>')
1252 {
1253 FORWARD (1);
1254 CPP_RESERVE (pfile, 2);
1255 CPP_PUTC_Q (pfile, c);
1256 CPP_PUTC_Q (pfile, c2);
1368ee70 1257 return CPP_OPEN_BRACE;
5eec0563
JM
1258 }
1259 /* else fall through */
1260
45b966db
ZW
1261 case '*':
1262 case '!':
45b966db
ZW
1263 case '=':
1264 case '^':
1265 if (PEEKC () == '=')
1266 goto op2;
1267 goto randomchar;
1268
1269 case '-':
1270 c2 = PEEKC ();
1271 if (c2 == '-')
1272 {
ae79697b 1273 if (CPP_OPTION (pfile, chill))
45b966db
ZW
1274 goto comment; /* Chill style comment */
1275 else
1276 goto op2;
1277 }
1278 else if (c2 == '=')
1279 goto op2;
1280 else if (c2 == '>')
1281 {
ae79697b 1282 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
45b966db
ZW
1283 {
1284 /* In C++, there's a ->* operator. */
1285 token = CPP_OTHER;
45b966db
ZW
1286 CPP_RESERVE (pfile, 4);
1287 CPP_PUTC_Q (pfile, c);
1288 CPP_PUTC_Q (pfile, GETC ());
1289 CPP_PUTC_Q (pfile, GETC ());
45b966db
ZW
1290 return token;
1291 }
1292 goto op2;
1293 }
1294 goto randomchar;
1295
1296 case '<':
1297 if (pfile->parsing_include_directive)
1298 {
1299 for (;;)
1300 {
1301 CPP_PUTC (pfile, c);
1302 if (c == '>')
1303 break;
1304 c = GETC ();
1305 if (c == '\n' || c == EOF)
1306 {
1307 cpp_error (pfile,
1308 "missing '>' in `#include <FILENAME>'");
1309 break;
1310 }
1311 else if (c == '\r')
1312 {
1313 if (!CPP_BUFFER (pfile)->has_escapes)
1314 {
1315 /* Backslash newline is replaced by nothing. */
1316 CPP_ADJUST_WRITTEN (pfile, -1);
1317 CPP_BUMP_LINE (pfile);
1318 }
1319 else
1320 {
1321 /* We might conceivably get \r- or \r<space> in
1322 here. Just delete 'em. */
1323 int d = GETC();
1324 if (d != '-' && d != ' ')
1325 cpp_ice (pfile, "unrecognized escape \\r%c", d);
1326 CPP_ADJUST_WRITTEN (pfile, -1);
1327 }
1328 }
1329 }
1330 return CPP_STRING;
1331 }
5eec0563
JM
1332 /* Digraphs: "<%" == "{", "<:" == "[". */
1333 c2 = PEEKC ();
1334 if (c2 == '%')
1335 {
1336 FORWARD (1);
1337 CPP_RESERVE (pfile, 2);
1338 CPP_PUTC_Q (pfile, c);
1339 CPP_PUTC_Q (pfile, c2);
1368ee70 1340 return CPP_CLOSE_BRACE;
5eec0563
JM
1341 }
1342 else if (c2 == ':')
1343 goto op2;
45b966db
ZW
1344 /* else fall through */
1345 case '>':
1346 c2 = PEEKC ();
1347 if (c2 == '=')
1348 goto op2;
1349 /* GNU C++ supports MIN and MAX operators <? and >?. */
ae79697b 1350 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
45b966db
ZW
1351 goto randomchar;
1352 FORWARD(1);
5eec0563
JM
1353 CPP_RESERVE (pfile, 3);
1354 CPP_PUTC_Q (pfile, c);
1355 CPP_PUTC_Q (pfile, c2);
1356 if (PEEKC () == '=')
45b966db 1357 CPP_PUTC_Q (pfile, GETC ());
45b966db
ZW
1358 return CPP_OTHER;
1359
1360 case '.':
1361 c2 = PEEKC ();
5eec0563 1362 if (ISDIGIT (c2))
45b966db 1363 {
5eec0563 1364 CPP_PUTC (pfile, c);
45b966db
ZW
1365 c = GETC ();
1366 goto number;
1367 }
1368
1369 /* In C++ there's a .* operator. */
ae79697b 1370 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
45b966db
ZW
1371 goto op2;
1372
1373 if (c2 == '.' && PEEKN(1) == '.')
1374 {
5eec0563 1375 CPP_RESERVE (pfile, 3);
45b966db
ZW
1376 CPP_PUTC_Q (pfile, '.');
1377 CPP_PUTC_Q (pfile, '.');
1378 CPP_PUTC_Q (pfile, '.');
1379 FORWARD (2);
1368ee70 1380 return CPP_ELLIPSIS;
45b966db
ZW
1381 }
1382 goto randomchar;
1383
1384 op2:
5eec0563 1385 CPP_RESERVE (pfile, 2);
45b966db
ZW
1386 CPP_PUTC_Q (pfile, c);
1387 CPP_PUTC_Q (pfile, GETC ());
5eec0563 1388 return CPP_OTHER;
45b966db
ZW
1389
1390 case 'L':
1391 c2 = PEEKC ();
1392 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1393 {
1394 CPP_PUTC (pfile, c);
1395 c = GETC ();
1396 parse_string (pfile, c);
45b966db
ZW
1397 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1398 }
1399 goto letter;
1400
1401 case '0': case '1': case '2': case '3': case '4':
1402 case '5': case '6': case '7': case '8': case '9':
1403 number:
1404 c2 = '.';
1405 for (;;)
1406 {
1407 CPP_RESERVE (pfile, 2);
1408 CPP_PUTC_Q (pfile, c);
1409 c = PEEKC ();
1410 if (c == EOF)
1411 break;
1412 if (!is_numchar(c) && c != '.'
1413 && ((c2 != 'e' && c2 != 'E'
1414 && ((c2 != 'p' && c2 != 'P')
ae79697b 1415 || CPP_OPTION (pfile, c89)))
45b966db
ZW
1416 || (c != '+' && c != '-')))
1417 break;
1418 FORWARD(1);
1419 c2= c;
1420 }
45b966db
ZW
1421 return CPP_NUMBER;
1422 case 'b': case 'c': case 'd': case 'h': case 'o':
1423 case 'B': case 'C': case 'D': case 'H': case 'O':
ae79697b 1424 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
45b966db 1425 {
45b966db
ZW
1426 CPP_RESERVE (pfile, 2);
1427 CPP_PUTC_Q (pfile, c);
1428 CPP_PUTC_Q (pfile, '\'');
1429 FORWARD(1);
1430 for (;;)
1431 {
1432 c = GETC();
1433 if (c == EOF)
1434 goto chill_number_eof;
1435 if (!is_numchar(c))
1436 break;
1437 CPP_PUTC (pfile, c);
1438 }
1439 if (c == '\'')
1440 {
1441 CPP_RESERVE (pfile, 2);
1442 CPP_PUTC_Q (pfile, c);
45b966db
ZW
1443 return CPP_STRING;
1444 }
1445 else
1446 {
1447 FORWARD(-1);
1448 chill_number_eof:
45b966db
ZW
1449 return CPP_NUMBER;
1450 }
1451 }
1452 else
1453 goto letter;
1454 case '_':
1455 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1456 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1457 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1458 case 'x': case 'y': case 'z':
1459 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1460 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1461 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1462 case 'Y': case 'Z':
1463 letter:
45b966db
ZW
1464 _cpp_parse_name (pfile, c);
1465 return CPP_MACRO;
1466
64aaf407
NB
1467 case ' ': case '\t': case '\v': case '\f': case '\0':
1468 {
1469 int null_count = 0;
1470
1471 for (;;)
1472 {
1473 if (c == '\0')
1474 null_count++;
1475 else
1476 CPP_PUTC (pfile, c);
1477 c = PEEKC ();
1478 if (c == EOF || !is_hspace(c))
1479 break;
1480 FORWARD(1);
1481 }
1482 if (null_count)
1483 null_warning (pfile, null_count);
1484 return CPP_HSPACE;
1485 }
45b966db
ZW
1486
1487 case '\r':
1488 if (CPP_BUFFER (pfile)->has_escapes)
1489 {
1490 c = GETC ();
1491 if (c == '-')
1492 {
1493 if (pfile->output_escapes)
1494 CPP_PUTS (pfile, "\r-", 2);
1495 _cpp_parse_name (pfile, GETC ());
1496 return CPP_NAME;
1497 }
1498 else if (c == ' ')
1499 {
ff2b53ef
ZW
1500 /* "\r " means a space, but only if necessary to prevent
1501 accidental token concatenation. */
45b966db
ZW
1502 CPP_RESERVE (pfile, 2);
1503 if (pfile->output_escapes)
1504 CPP_PUTC_Q (pfile, '\r');
1505 CPP_PUTC_Q (pfile, c);
1506 return CPP_HSPACE;
1507 }
1508 else
1509 {
1510 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1511 goto get_next;
1512 }
1513 }
1514 else
1515 {
1516 /* Backslash newline is ignored. */
cbccf5e8
MM
1517 if (!ACTIVE_MARK_P (pfile))
1518 CPP_BUMP_LINE (pfile);
45b966db
ZW
1519 goto get_next;
1520 }
1521
1522 case '\n':
1523 CPP_PUTC (pfile, c);
45b966db
ZW
1524 return CPP_VSPACE;
1525
1368ee70
ZW
1526 case '(': token = CPP_OPEN_PAREN; goto char1;
1527 case ')': token = CPP_CLOSE_PAREN; goto char1;
1528 case '{': token = CPP_OPEN_BRACE; goto char1;
1529 case '}': token = CPP_CLOSE_BRACE; goto char1;
1530 case ',': token = CPP_COMMA; goto char1;
1531 case ';': token = CPP_SEMICOLON; goto char1;
45b966db
ZW
1532
1533 randomchar:
1534 default:
1535 token = CPP_OTHER;
1536 char1:
45b966db
ZW
1537 CPP_PUTC (pfile, c);
1538 return token;
1539 }
1540}
1541
1542/* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1543 Caller is expected to have checked no_macro_expand. */
1544static int
1545maybe_macroexpand (pfile, written)
1546 cpp_reader *pfile;
1547 long written;
1548{
1549 U_CHAR *macro = pfile->token_buffer + written;
1550 size_t len = CPP_WRITTEN (pfile) - written;
1551 HASHNODE *hp = _cpp_lookup (pfile, macro, len);
1552
a7abcbbf
ZW
1553 /* _cpp_lookup never returns null. */
1554 if (hp->type == T_VOID)
45b966db 1555 return 0;
d9e0bd53 1556 if (hp->disabled || hp->type == T_IDENTITY)
45b966db
ZW
1557 {
1558 if (pfile->output_escapes)
1559 {
1560 /* Insert a no-reexpand marker before IDENT. */
1561 CPP_RESERVE (pfile, 2);
1562 CPP_ADJUST_WRITTEN (pfile, 2);
1563 macro = pfile->token_buffer + written;
1564
1565 memmove (macro + 2, macro, len);
1566 macro[0] = '\r';
1567 macro[1] = '-';
1568 }
1569 return 0;
1570 }
ff2b53ef
ZW
1571 if (hp->type == T_EMPTY)
1572 {
1573 /* Special case optimization: macro expands to nothing. */
1574 CPP_SET_WRITTEN (pfile, written);
1575 CPP_PUTC_Q (pfile, ' ');
1576 return 1;
1577 }
45b966db
ZW
1578
1579 /* If macro wants an arglist, verify that a '(' follows. */
d9e0bd53 1580 if (hp->type == T_FMACRO)
45b966db
ZW
1581 {
1582 int macbuf_whitespace = 0;
1583 int c;
1584
1585 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1586 {
1587 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1588 for (;;)
1589 {
1590 _cpp_skip_hspace (pfile);
1591 c = PEEKC ();
1592 if (c == '\n')
1593 FORWARD(1);
1594 else
1595 break;
1596 }
1597 if (point != CPP_BUFFER (pfile)->cur)
1598 macbuf_whitespace = 1;
1599 if (c == '(')
1600 goto is_macro_call;
1601 else if (c != EOF)
1602 goto not_macro_call;
1603 cpp_pop_buffer (pfile);
1604 }
1605
1606 CPP_SET_MARK (pfile);
1607 for (;;)
1608 {
1609 _cpp_skip_hspace (pfile);
1610 c = PEEKC ();
1611 if (c == '\n')
1612 FORWARD(1);
1613 else
1614 break;
1615 }
1616 CPP_GOTO_MARK (pfile);
1617
1618 if (c != '(')
1619 {
1620 not_macro_call:
1621 if (macbuf_whitespace)
1622 CPP_PUTC (pfile, ' ');
1623 return 0;
1624 }
1625 }
1626
1627 is_macro_call:
1628 /* This is now known to be a macro call.
1629 Expand the macro, reading arguments as needed,
1630 and push the expansion on the input stack. */
1631 _cpp_macroexpand (pfile, hp);
1632 CPP_SET_WRITTEN (pfile, written);
1633 return 1;
1634}
1635
9e62c811
ZW
1636/* Complain about \v or \f in a preprocessing directive (constraint
1637 violation, C99 6.10 para 5). Caller has checked CPP_PEDANTIC. */
1638static void
1639pedantic_whitespace (pfile, p, len)
1640 cpp_reader *pfile;
1641 U_CHAR *p;
1642 unsigned int len;
1643{
1644 while (len)
1645 {
1646 if (*p == '\v')
1647 cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1648 else if (*p == '\f')
1649 cpp_pedwarn (pfile, "form feed in preprocessing directive");
1650 p++;
1651 len--;
1652 }
1653}
1654
1655
3a2b2c7a 1656enum cpp_ttype
45b966db
ZW
1657cpp_get_token (pfile)
1658 cpp_reader *pfile;
1659{
3a2b2c7a 1660 enum cpp_ttype token;
45b966db
ZW
1661 long written = CPP_WRITTEN (pfile);
1662
1663 get_next:
1664 token = _cpp_lex_token (pfile);
1665
1666 switch (token)
1667 {
1668 default:
ff2b53ef
ZW
1669 pfile->potential_control_macro = 0;
1670 pfile->only_seen_white = 0;
1671 return token;
1672
1673 case CPP_VSPACE:
1674 if (pfile->only_seen_white == 0)
1675 pfile->only_seen_white = 1;
1676 CPP_BUMP_LINE (pfile);
ff2b53ef
ZW
1677 return token;
1678
1679 case CPP_HSPACE:
1680 case CPP_COMMENT:
45b966db
ZW
1681 return token;
1682
1683 case CPP_DIRECTIVE:
ff2b53ef 1684 pfile->potential_control_macro = 0;
45b966db
ZW
1685 if (_cpp_handle_directive (pfile))
1686 return CPP_DIRECTIVE;
1687 pfile->only_seen_white = 0;
1688 CPP_PUTC (pfile, '#');
1689 return CPP_OTHER;
1690
1691 case CPP_MACRO:
ff2b53ef
ZW
1692 pfile->potential_control_macro = 0;
1693 pfile->only_seen_white = 0;
45b966db
ZW
1694 if (! pfile->no_macro_expand
1695 && maybe_macroexpand (pfile, written))
1696 goto get_next;
1697 return CPP_NAME;
1698
1699 case CPP_EOF:
f2d5f0cc
ZW
1700 if (CPP_BUFFER (pfile) == NULL)
1701 return CPP_EOF;
c56c2073 1702 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
f2d5f0cc 1703 {
45b966db
ZW
1704 cpp_pop_buffer (pfile);
1705 goto get_next;
1706 }
c56c2073
ZW
1707 cpp_pop_buffer (pfile);
1708 return CPP_EOF;
45b966db
ZW
1709 }
1710}
1711
1712/* Like cpp_get_token, but skip spaces and comments. */
1713
3a2b2c7a 1714enum cpp_ttype
45b966db
ZW
1715cpp_get_non_space_token (pfile)
1716 cpp_reader *pfile;
1717{
1718 int old_written = CPP_WRITTEN (pfile);
1719 for (;;)
1720 {
3a2b2c7a 1721 enum cpp_ttype token = cpp_get_token (pfile);
ff2b53ef 1722 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
45b966db
ZW
1723 return token;
1724 CPP_SET_WRITTEN (pfile, old_written);
1725 }
1726}
1727
ff2b53ef 1728/* Like cpp_get_token, except that it does not execute directives,
c56c2073 1729 does not consume vertical space, and discards horizontal space. */
3a2b2c7a 1730enum cpp_ttype
9e62c811 1731_cpp_get_directive_token (pfile)
45b966db
ZW
1732 cpp_reader *pfile;
1733{
ff2b53ef 1734 long old_written;
3a2b2c7a 1735 enum cpp_ttype token;
57c578a6 1736 int at_bol;
45b966db 1737
ff2b53ef 1738 get_next:
57c578a6 1739 at_bol = (CPP_BUFFER (pfile)->cur == CPP_BUFFER (pfile)->line_base);
ff2b53ef
ZW
1740 old_written = CPP_WRITTEN (pfile);
1741 token = _cpp_lex_token (pfile);
1742 switch (token)
45b966db 1743 {
ff2b53ef
ZW
1744 default:
1745 return token;
45b966db 1746
ff2b53ef
ZW
1747 case CPP_VSPACE:
1748 /* Put it back and return VSPACE. */
1749 FORWARD(-1);
1750 CPP_ADJUST_WRITTEN (pfile, -1);
1751 return CPP_VSPACE;
45b966db 1752
ff2b53ef 1753 case CPP_HSPACE:
57c578a6
ZW
1754 /* The purpose of this rather strange check is to prevent pedantic
1755 warnings for ^L in an #ifdefed out block. */
1756 if (CPP_PEDANTIC (pfile) && ! at_bol)
9e62c811
ZW
1757 pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1758 CPP_WRITTEN (pfile) - old_written);
1759 CPP_SET_WRITTEN (pfile, old_written);
1760 goto get_next;
ff2b53ef 1761 return CPP_HSPACE;
45b966db 1762
ff2b53ef
ZW
1763 case CPP_DIRECTIVE:
1764 /* Don't execute the directive, but don't smash it to OTHER either. */
1765 CPP_PUTC (pfile, '#');
1766 return CPP_DIRECTIVE;
1767
1768 case CPP_MACRO:
1769 if (! pfile->no_macro_expand
1770 && maybe_macroexpand (pfile, old_written))
1771 goto get_next;
1772 return CPP_NAME;
45b966db 1773
ff2b53ef
ZW
1774 case CPP_EOF:
1775 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
45b966db 1776 {
ff2b53ef
ZW
1777 cpp_pop_buffer (pfile);
1778 goto get_next;
45b966db 1779 }
ff2b53ef
ZW
1780 else
1781 /* This can happen for files that don't end with a newline,
1782 and for cpp_define and friends. Pretend they do, so
1783 callers don't have to deal. A warning will be issued by
1784 someone else, if necessary. */
1785 return CPP_VSPACE;
1786 }
1787}
1788
45b966db
ZW
1789/* Determine the current line and column. Used only by read_and_prescan. */
1790static U_CHAR *
1791find_position (start, limit, linep)
1792 U_CHAR *start;
1793 U_CHAR *limit;
1794 unsigned long *linep;
1795{
1796 unsigned long line = *linep;
1797 U_CHAR *lbase = start;
1798 while (start < limit)
1799 {
1800 U_CHAR ch = *start++;
1801 if (ch == '\n' || ch == '\r')
1802 {
1803 line++;
1804 lbase = start;
1805 }
1806 }
1807 *linep = line;
1808 return lbase;
1809}
1810
2a87fbe8
ZW
1811/* The following table is used by _cpp_read_and_prescan. If we have
1812 designated initializers, it can be constant data; otherwise, it is
1813 set up at runtime by _cpp_init_input_buffer. */
46d07497
ZW
1814
1815#ifndef UCHAR_MAX
1816#define UCHAR_MAX 255 /* assume 8-bit bytes */
1817#endif
1818
12cf91fe 1819#if (GCC_VERSION >= 2007)
2a87fbe8 1820#define init_chartab() /* nothing */
12cf91fe 1821#define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
46d07497
ZW
1822#define END };
1823#define s(p, v) [p] = v,
1824#else
12cf91fe 1825#define CHARTAB static U_CHAR chartab[UCHAR_MAX + 1] = { 0 }; \
2a87fbe8
ZW
1826 static void init_chartab PARAMS ((void)) { \
1827 unsigned char *x = chartab;
46d07497
ZW
1828#define END }
1829#define s(p, v) x[p] = v;
1830#endif
1831
1832/* Table of characters that can't be handled in the inner loop.
2a87fbe8
ZW
1833 Also contains the mapping between trigraph third characters and their
1834 replacements. */
46d07497
ZW
1835#define SPECCASE_CR 1
1836#define SPECCASE_BACKSLASH 2
1837#define SPECCASE_QUESTION 3
1838
2a87fbe8 1839CHARTAB
46d07497
ZW
1840 s('\r', SPECCASE_CR)
1841 s('\\', SPECCASE_BACKSLASH)
1842 s('?', SPECCASE_QUESTION)
46d07497 1843
46d07497
ZW
1844 s('=', '#') s(')', ']') s('!', '|')
1845 s('(', '[') s('\'', '^') s('>', '}')
1846 s('/', '\\') s('<', '{') s('-', '~')
1847END
1848
1849#undef CHARTAB
46d07497
ZW
1850#undef END
1851#undef s
1852
2a87fbe8
ZW
1853#define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1854#define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1855
45b966db
ZW
1856/* Read the entire contents of file DESC into buffer BUF. LEN is how
1857 much memory to allocate initially; more will be allocated if
1858 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1859 canonical form (\n). If enabled, convert and/or warn about
1860 trigraphs. Convert backslash-newline to a one-character escape
1861 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1862 token). If there is no newline at the end of the file, add one and
1863 warn. Returns -1 on failure, or the actual length of the data to
1864 be scanned.
1865
1866 This function does a lot of work, and can be a serious performance
1867 bottleneck. It has been tuned heavily; make sure you understand it
1868 before hacking. The common case - no trigraphs, Unix style line
1869 breaks, backslash-newline set off by whitespace, newline at EOF -
1870 has been optimized at the expense of the others. The performance
1871 penalty for DOS style line breaks (\r\n) is about 15%.
1872
1873 Warnings lose particularly heavily since we have to determine the
1874 line number, which involves scanning from the beginning of the file
1875 or from the last warning. The penalty for the absence of a newline
1876 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1877
1878 If your file has more than one kind of end-of-line marker, you
04e3ec78
NB
1879 will get messed-up line numbering.
1880
1881 So that the cases of the switch statement do not have to concern
1882 themselves with the complications of reading beyond the end of the
1883 buffer, the buffer is guaranteed to have at least 3 characters in
1884 it (or however many are left in the file, if less) on entry to the
1885 switch. This is enough to handle trigraphs and the "\\\n\r" and
1886 "\\\r\n" cases.
1887
1888 The end of the buffer is marked by a '\\', which, being a special
1889 character, guarantees we will exit the fast-scan loops and perform
1890 a refill. */
46d07497 1891
45b966db
ZW
1892long
1893_cpp_read_and_prescan (pfile, fp, desc, len)
1894 cpp_reader *pfile;
1895 cpp_buffer *fp;
1896 int desc;
1897 size_t len;
1898{
1899 U_CHAR *buf = (U_CHAR *) xmalloc (len);
1900 U_CHAR *ip, *op, *line_base;
1901 U_CHAR *ibase;
45b966db
ZW
1902 unsigned long line;
1903 unsigned int deferred_newlines;
45b966db 1904 size_t offset;
04e3ec78 1905 int count = 0;
45b966db
ZW
1906
1907 offset = 0;
04e3ec78 1908 deferred_newlines = 0;
45b966db
ZW
1909 op = buf;
1910 line_base = buf;
1911 line = 1;
04e3ec78
NB
1912 ibase = pfile->input_buffer + 3;
1913 ip = ibase;
1914 ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
45b966db
ZW
1915
1916 for (;;)
1917 {
04e3ec78
NB
1918 U_CHAR *near_buff_end;
1919
04e3ec78 1920 count = read (desc, ibase, pfile->input_buffer_len);
45b966db
ZW
1921 if (count < 0)
1922 goto error;
04e3ec78
NB
1923
1924 ibase[count] = '\\'; /* Marks end of buffer */
1925 if (count)
45b966db 1926 {
04e3ec78
NB
1927 near_buff_end = pfile->input_buffer + count;
1928 offset += count;
45b966db 1929 if (offset > len)
04e3ec78
NB
1930 {
1931 size_t delta_op;
1932 size_t delta_line_base;
1b955cba 1933 len = offset * 2;
04e3ec78
NB
1934 if (offset > len)
1935 /* len overflowed.
1936 This could happen if the file is larger than half the
1937 maximum address space of the machine. */
1938 goto too_big;
1939
1940 delta_op = op - buf;
1941 delta_line_base = line_base - buf;
1942 buf = (U_CHAR *) xrealloc (buf, len);
1943 op = buf + delta_op;
1944 line_base = buf + delta_line_base;
1945 }
1946 }
1947 else
1948 {
1949 if (ip == ibase)
1950 break;
1951 /* Allow normal processing of the (at most 2) remaining
1952 characters. The end-of-buffer marker is still present
1953 and prevents false matches within the switch. */
1954 near_buff_end = ibase - 1;
45b966db
ZW
1955 }
1956
1957 for (;;)
1958 {
04e3ec78 1959 unsigned int span;
45b966db 1960
04e3ec78 1961 /* Deal with \-newline, potentially in the middle of a token. */
45b966db
ZW
1962 if (deferred_newlines)
1963 {
2a87fbe8 1964 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
04e3ec78
NB
1965 {
1966 /* Previous was not white space. Skip to white
1967 space, if we can, before outputting the \r's */
1968 span = 0;
1969 while (ip[span] != ' '
1970 && ip[span] != '\t'
1971 && ip[span] != '\n'
2a87fbe8 1972 && NORMAL(ip[span]))
04e3ec78
NB
1973 span++;
1974 memcpy (op, ip, span);
1975 op += span;
1976 ip += span;
2a87fbe8 1977 if (! NORMAL(ip[0]))
04e3ec78
NB
1978 goto do_speccase;
1979 }
1980 while (deferred_newlines)
1981 deferred_newlines--, *op++ = '\r';
45b966db
ZW
1982 }
1983
1984 /* Copy as much as we can without special treatment. */
04e3ec78 1985 span = 0;
2a87fbe8 1986 while (NORMAL (ip[span])) span++;
45b966db
ZW
1987 memcpy (op, ip, span);
1988 op += span;
1989 ip += span;
1990
04e3ec78
NB
1991 do_speccase:
1992 if (ip > near_buff_end) /* Do we have enough chars? */
1993 break;
2a87fbe8 1994 switch (chartab[*ip++])
45b966db 1995 {
45b966db 1996 case SPECCASE_CR: /* \r */
04e3ec78 1997 if (ip[-2] != '\n')
45b966db 1998 {
04e3ec78
NB
1999 if (*ip == '\n')
2000 ip++;
2001 *op++ = '\n';
45b966db 2002 }
45b966db
ZW
2003 break;
2004
2005 case SPECCASE_BACKSLASH: /* \ */
04e3ec78 2006 if (*ip == '\n')
45b966db 2007 {
04e3ec78 2008 deferred_newlines++;
45b966db
ZW
2009 ip++;
2010 if (*ip == '\r') ip++;
45b966db
ZW
2011 }
2012 else if (*ip == '\r')
2013 {
04e3ec78 2014 deferred_newlines++;
45b966db
ZW
2015 ip++;
2016 if (*ip == '\n') ip++;
45b966db
ZW
2017 }
2018 else
2019 *op++ = '\\';
04e3ec78 2020 break;
45b966db
ZW
2021
2022 case SPECCASE_QUESTION: /* ? */
2023 {
2024 unsigned int d, t;
04e3ec78
NB
2025
2026 *op++ = '?'; /* Normal non-trigraph case */
2027 if (ip[0] != '?')
2028 break;
2029
45b966db 2030 d = ip[1];
2a87fbe8
ZW
2031 t = chartab[d];
2032 if (NONTRI (t))
04e3ec78 2033 break;
45b966db 2034
ae79697b 2035 if (CPP_OPTION (pfile, warn_trigraphs))
45b966db
ZW
2036 {
2037 unsigned long col;
2038 line_base = find_position (line_base, op, &line);
2039 col = op - line_base + 1;
ae79697b 2040 if (CPP_OPTION (pfile, trigraphs))
45b966db 2041 cpp_warning_with_line (pfile, line, col,
04e3ec78 2042 "trigraph ??%c converted to %c", d, t);
45b966db
ZW
2043 else
2044 cpp_warning_with_line (pfile, line, col,
04e3ec78 2045 "trigraph ??%c ignored", d);
45b966db 2046 }
04e3ec78
NB
2047
2048 ip += 2;
ae79697b 2049 if (CPP_OPTION (pfile, trigraphs))
45b966db 2050 {
04e3ec78 2051 op[-1] = t; /* Overwrite '?' */
45b966db 2052 if (t == '\\')
04e3ec78
NB
2053 {
2054 op--;
2055 *--ip = '\\';
2056 goto do_speccase; /* May need buffer refill */
2057 }
45b966db
ZW
2058 }
2059 else
2060 {
45b966db
ZW
2061 *op++ = '?';
2062 *op++ = d;
2063 }
2064 }
04e3ec78 2065 break;
45b966db
ZW
2066 }
2067 }
f6fab919
ZW
2068 /* Copy previous char plus unprocessed (at most 2) chars
2069 to beginning of buffer, refill it with another
2070 read(), and continue processing */
2071 memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
2072 ip -= count;
45b966db
ZW
2073 }
2074
2075 if (offset == 0)
2076 return 0;
2077
45b966db
ZW
2078 if (op[-1] != '\n')
2079 {
2080 unsigned long col;
2081 line_base = find_position (line_base, op, &line);
2082 col = op - line_base + 1;
f6fab919 2083 cpp_warning_with_line (pfile, line, col, "no newline at end of file");
45b966db
ZW
2084 if (offset + 1 > len)
2085 {
2086 len += 1;
2087 if (offset + 1 > len)
2088 goto too_big;
2089 buf = (U_CHAR *) xrealloc (buf, len);
2090 op = buf + offset;
2091 }
2092 *op++ = '\n';
2093 }
2094
2095 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
2096 return op - buf;
2097
2098 too_big:
f6fab919
ZW
2099 cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
2100 (unsigned long)offset);
45b966db
ZW
2101 free (buf);
2102 return -1;
2103
2104 error:
2105 cpp_error_from_errno (pfile, fp->ihash->name);
2106 free (buf);
2107 return -1;
2108}
2109
2a87fbe8
ZW
2110/* Allocate pfile->input_buffer, and initialize chartab[]
2111 if it hasn't happened already. */
46d07497 2112
45b966db
ZW
2113void
2114_cpp_init_input_buffer (pfile)
2115 cpp_reader *pfile;
2116{
2117 U_CHAR *tmp;
2118
2a87fbe8 2119 init_chartab ();
9e62c811 2120 init_token_list (pfile, &pfile->directbuf, 0);
04e3ec78 2121
45b966db
ZW
2122 /* Determine the appropriate size for the input buffer. Normal C
2123 source files are smaller than eight K. */
04e3ec78
NB
2124 /* 8Kbytes of buffer proper, 1 to detect running off the end without
2125 address arithmetic all the time, and 3 for pushback during buffer
2126 refill, in case there's a potential trigraph or end-of-line
2127 digraph at the end of a block. */
45b966db 2128
04e3ec78 2129 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
45b966db
ZW
2130 pfile->input_buffer = tmp;
2131 pfile->input_buffer_len = 8192;
2132}
c5a04734 2133
6d2c2047
ZW
2134/* Utility routine:
2135 Compares, in the manner of strcmp(3), the token beginning at TOKEN
2136 and extending for LEN characters to the NUL-terminated string
2137 STRING. Typical usage:
2138
2139 if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2140 "inline"))
2141 { ... }
2142 */
2143
2144int
2145cpp_idcmp (token, len, string)
2146 const U_CHAR *token;
2147 size_t len;
2148 const char *string;
2149{
2150 size_t len2 = strlen (string);
2151 int r;
2152
2153 if ((r = memcmp (token, string, MIN (len, len2))))
2154 return r;
2155
2156 /* The longer of the two strings sorts after the shorter. */
2157 if (len == len2)
2158 return 0;
2159 else if (len < len2)
2160 return -1;
2161 else
2162 return 1;
2163}
2164
b8f41010 2165#ifdef NEW_LEXER
c5a04734 2166
d6d5f795
NB
2167/* Lexing algorithm.
2168
2169 The original lexer in cpplib was made up of two passes: a first pass
2170 that replaced trigraphs and deleted esacped newlines, and a second
2171 pass that tokenized the result of the first pass. Tokenisation was
2172 performed by peeking at the next character in the input stream. For
6777db6d 2173 example, if the input stream contained "!=", the handler for the !
d6d5f795 2174 character would peek at the next character, and if it were a '='
6777db6d
NB
2175 would skip over it, and return a "!=" token, otherwise it would
2176 return just the "!" token.
d6d5f795
NB
2177
2178 To implement a single-pass lexer, this peeking ahead is unworkable.
2179 An arbitrary number of escaped newlines, and trigraphs (in particular
6777db6d
NB
2180 ??/ which translates to the escape \), could separate the '!' and '='
2181 in the input stream, yet the next token is still a "!=".
d6d5f795
NB
2182
2183 Suppose instead that we lex by one logical line at a time, producing
6777db6d
NB
2184 a token list or stack for each logical line, and when seeing the '!'
2185 push a CPP_NOT token on the list. Then if the '!' is part of a
2186 longer token ("!=") we know we must see the remainder of the token by
2187 the time we reach the end of the logical line. Thus we can have the
2188 '=' handler look at the previous token (at the end of the list / top
2189 of the stack) and see if it is a "!" token, and if so, instead of
2190 pushing a "=" token revise the existing token to be a "!=" token.
d6d5f795
NB
2191
2192 This works in the presence of escaped newlines, because the '\' would
2193 have been pushed on the top of the stack as a CPP_BACKSLASH. The
2194 newline ('\n' or '\r') handler looks at the token at the top of the
2195 stack to see if it is a CPP_BACKSLASH, and if so discards both.
2196 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
2197 the '=' handler would never see any intervening escaped newlines.
2198
2199 To make trigraphs work in this context, as in precedence trigraphs
2200 are highest and converted before anything else, the '?' handler does
2201 lookahead to see if it is a trigraph, and if so skips the trigraph
2202 and pushes the token it represents onto the top of the stack. This
2203 also works in the particular case of a CPP_BACKSLASH trigraph.
2204
2205 To the preprocessor, whitespace is only significant to the point of
2206 knowing whether whitespace precedes a particular token. For example,
2207 the '=' handler needs to know whether there was whitespace between it
6777db6d 2208 and a "!" token on the top of the stack, to make the token conversion
d6d5f795
NB
2209 decision correctly. So each token has a PREV_WHITESPACE flag to
2210 indicate this - the standard permits consecutive whitespace to be
2211 regarded as a single space. The compiler front ends are not
2212 interested in whitespace at all; they just require a token stream.
2213 Another place where whitespace is significant to the preprocessor is
2214 a #define statment - if there is whitespace between the macro name
2215 and an initial "(" token the macro is "object-like", otherwise it is
2216 a function-like macro that takes arguments.
2217
2218 However, all is not rosy. Parsing of identifiers, numbers, comments
2219 and strings becomes trickier because of the possibility of raw
2220 trigraphs and escaped newlines in the input stream.
2221
2222 The trigraphs are three consecutive characters beginning with two
c2e25d51
NB
2223 question marks. A question mark is not valid as part of a number or
2224 identifier, so parsing of a number or identifier terminates normally
2225 upon reaching it, returning to the mainloop which handles the
2226 trigraph just like it would in any other position. Similarly for the
2227 backslash of a backslash-newline combination. So we just need the
2228 escaped-newline dropper in the mainloop to check if the token on the
2229 top of the stack after dropping the escaped newline is a number or
2230 identifier, and if so to continue the processing it as if nothing had
2231 happened.
d6d5f795
NB
2232
2233 For strings, we replace trigraphs whenever we reach a quote or
2234 newline, because there might be a backslash trigraph escaping them.
2235 We need to be careful that we start trigraph replacing from where we
2236 left off previously, because it is possible for a first scan to leave
2237 "fake" trigraphs that a second scan would pick up as real (e.g. the
c2e25d51 2238 sequence "????/\n=" would find a fake ??= trigraph after removing the
d6d5f795
NB
2239 escaped newline.)
2240
2241 For line comments, on reaching a newline we scan the previous
2242 character(s) to see if it escaped, and continue if it is. Block
2243 comments ignore everything and just focus on finding the comment
2244 termination mark. The only difficult thing, and it is surprisingly
2245 tricky, is checking if an asterisk precedes the final slash since
2246 they could be separated by escaped newlines. If the preprocessor is
2247 invoked with the output comments option, we don't bother removing
2248 escaped newlines and replacing trigraphs for output.
2249
2250 Finally, numbers can begin with a period, which is pushed initially
2251 as a CPP_DOT token in its own right. The digit handler checks if the
2252 previous token was a CPP_DOT not separated by whitespace, and if so
2253 pops it off the stack and pushes a period into the number's buffer
2254 before calling the number parser.
2255
2256*/
2257
b8f41010
NB
2258static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
2259 U":>", U"<%", U"%>"};
2260static unsigned char trigraph_map[256];
c5a04734
ZW
2261
2262static void
2263expand_comment_space (list)
2264 cpp_toklist *list;
2265{
2266 if (list->comments_cap == 0)
2267 {
2268 list->comments_cap = 10;
2269 list->comments = (cpp_token *)
2270 xmalloc (list->comments_cap * sizeof (cpp_token));
2271 }
2272 else
2273 {
2274 list->comments_cap *= 2;
2275 list->comments = (cpp_token *)
2276 xrealloc (list->comments, list->comments_cap);
2277 }
2278}
2279
2280void
2281cpp_free_token_list (list)
2282 cpp_toklist *list;
2283{
2284 if (list->comments)
2285 free (list->comments);
cfd5b8b8 2286 free (list->tokens - 1); /* Backup over dummy token. */
c5a04734
ZW
2287 free (list->namebuf);
2288 free (list);
2289}
2290
c5a04734
ZW
2291void
2292init_trigraph_map ()
2293{
2294 trigraph_map['='] = '#';
2295 trigraph_map['('] = '[';
2296 trigraph_map[')'] = ']';
2297 trigraph_map['/'] = '\\';
2298 trigraph_map['\''] = '^';
2299 trigraph_map['<'] = '{';
2300 trigraph_map['>'] = '}';
2301 trigraph_map['!'] = '|';
2302 trigraph_map['-'] = '~';
2303}
2304
2305/* Call when a trigraph is encountered. It warns if necessary, and
2306 returns true if the trigraph should be honoured. END is the third
2307 character of a trigraph in the input stream. */
2308static int
2309trigraph_ok (pfile, end)
2310 cpp_reader *pfile;
2311 const unsigned char *end;
2312{
2313 int accept = CPP_OPTION (pfile, trigraphs);
2314
2315 if (CPP_OPTION (pfile, warn_trigraphs))
2316 {
2317 unsigned int col = end - 1 - pfile->buffer->line_base;
2318 if (accept)
2319 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2320 "trigraph ??%c converted to %c",
2321 (int) *end, (int) trigraph_map[*end]);
2322 else
2323 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2324 "trigraph ??%c ignored", (int) *end);
2325 }
2326 return accept;
2327}
2328
2329/* Scan a string for trigraphs, warning or replacing them inline as
2330 appropriate. When parsing a string, we must call this routine
2331 before processing a newline character (if trigraphs are enabled),
2332 since the newline might be escaped by a preceding backslash
2333 trigraph sequence. Returns a pointer to the end of the name after
2334 replacement. */
2335
2336static unsigned char*
2337trigraph_replace (pfile, src, limit)
2338 cpp_reader *pfile;
2339 unsigned char *src;
2340 unsigned char* limit;
2341{
2342 unsigned char *dest;
2343
2344 /* Starting with src[1], find two consecutive '?'. The case of no
2345 trigraphs is streamlined. */
2346
2347 for (; src + 1 < limit; src += 2)
2348 {
2349 if (src[0] != '?')
2350 continue;
2351
2352 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
2353 if (src[-1] == '?')
2354 src--;
2355 else if (src + 2 == limit || src[1] != '?')
2356 continue;
2357
2358 /* Check if it really is a trigraph. */
2359 if (trigraph_map[src[2]] == 0)
2360 continue;
2361
2362 dest = src;
2363 goto trigraph_found;
2364 }
2365 return limit;
2366
2367 /* Now we have a trigraph, we need to scan the remaining buffer, and
2368 copy-shifting its contents left if replacement is enabled. */
2369 for (; src + 2 < limit; dest++, src++)
2370 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2371 {
2372 trigraph_found:
2373 src += 2;
2374 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2375 *dest = trigraph_map[*src];
2376 }
2377
2378 /* Copy remaining (at most 2) characters. */
2379 while (src < limit)
2380 *dest++ = *src++;
2381 return dest;
2382}
2383
2384/* If CUR is a backslash or the end of a trigraphed backslash, return
2385 a pointer to its beginning, otherwise NULL. We don't read beyond
2386 the buffer start, because there is the start of the comment in the
2387 buffer. */
2388static const unsigned char *
2389backslash_start (pfile, cur)
2390 cpp_reader *pfile;
2391 const unsigned char *cur;
2392{
2393 if (cur[0] == '\\')
2394 return cur;
2395 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2396 && trigraph_ok (pfile, cur))
2397 return cur - 2;
2398 return 0;
2399}
2400
2401/* Skip a C-style block comment. This is probably the trickiest
2402 handler. We find the end of the comment by seeing if an asterisk
2403 is before every '/' we encounter. The nasty complication is that a
2404 previous asterisk may be separated by one or more escaped newlines.
2405 Returns non-zero if comment terminated by EOF, zero otherwise. */
2406static int
b8f41010 2407skip_block_comment2 (pfile)
c5a04734
ZW
2408 cpp_reader *pfile;
2409{
2410 cpp_buffer *buffer = pfile->buffer;
2411 const unsigned char *char_after_star = 0;
2412 register const unsigned char *cur = buffer->cur;
2413 int seen_eof = 0;
2414
2415 /* Inner loop would think the comment has ended if the first comment
2416 character is a '/'. Avoid this and keep the inner loop clean by
2417 skipping such a character. */
2418 if (cur < buffer->rlimit && cur[0] == '/')
2419 cur++;
2420
2421 for (; cur < buffer->rlimit; )
2422 {
2423 unsigned char c = *cur++;
2424
2425 /* People like decorating comments with '*', so check for
2426 '/' instead for efficiency. */
2427 if (c == '/')
2428 {
2429 if (cur[-2] == '*' || cur - 1 == char_after_star)
2430 goto out;
2431
2432 /* Warn about potential nested comments, but not when
2433 the final character inside the comment is a '/'.
2434 Don't bother to get it right across escaped newlines. */
2435 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2436 && cur[0] == '*' && cur[1] != '/')
2437 {
2438 buffer->cur = cur;
2439 cpp_warning (pfile, "'/*' within comment");
2440 }
2441 }
2442 else if (IS_NEWLINE(c))
2443 {
2444 const unsigned char* bslash = backslash_start (pfile, cur - 2);
2445
2446 handle_newline (cur, buffer->rlimit, c);
2447 /* Work correctly if there is an asterisk before an
2448 arbirtrarily long sequence of escaped newlines. */
2449 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2450 char_after_star = cur;
2451 else
2452 char_after_star = 0;
2453 }
2454 }
2455 seen_eof = 1;
2456
2457 out:
2458 buffer->cur = cur;
2459 return seen_eof;
2460}
2461
2462/* Skip a C++ or Chill line comment. Handles escaped newlines.
2463 Returns non-zero if a multiline comment. */
2464static int
b8f41010 2465skip_line_comment2 (pfile)
c5a04734
ZW
2466 cpp_reader *pfile;
2467{
2468 cpp_buffer *buffer = pfile->buffer;
2469 register const unsigned char *cur = buffer->cur;
2470 int multiline = 0;
2471
2472 for (; cur < buffer->rlimit; )
2473 {
2474 unsigned char c = *cur++;
2475
2476 if (IS_NEWLINE (c))
2477 {
2478 /* Check for a (trigaph?) backslash escaping the newline. */
2479 if (!backslash_start (pfile, cur - 2))
2480 goto out;
2481 multiline = 1;
2482 handle_newline (cur, buffer->rlimit, c);
2483 }
2484 }
2485 cur++;
2486
2487 out:
2488 buffer->cur = cur - 1; /* Leave newline for caller. */
2489 return multiline;
2490}
2491
2492/* Skips whitespace, stopping at next non-whitespace character. */
2493static void
2494skip_whitespace (pfile, in_directive)
2495 cpp_reader *pfile;
2496 int in_directive;
2497{
2498 cpp_buffer *buffer = pfile->buffer;
2499 register const unsigned char *cur = buffer->cur;
2500 unsigned short null_count = 0;
2501
2502 for (; cur < buffer->rlimit; )
2503 {
2504 unsigned char c = *cur++;
2505
2506 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
2507 continue;
2508 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
2509 goto out;
2510 if (c == '\0')
2511 null_count++;
2512 /* Mut be '\f' or '\v' */
2513 else if (in_directive && CPP_PEDANTIC (pfile))
2514 cpp_pedwarn (pfile, "%s in preprocessing directive",
2515 c == '\f' ? "formfeed" : "vertical tab");
2516 }
2517 cur++;
2518
2519 out:
2520 buffer->cur = cur - 1;
2521 if (null_count)
2522 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2523 : "embedded null character ignored");
2524}
2525
2526/* Parse (append) an identifier. */
2527static void
2528parse_name (pfile, list, name)
2529 cpp_reader *pfile;
2530 cpp_toklist *list;
2531 cpp_name *name;
2532{
2533 const unsigned char *name_limit;
2534 unsigned char *namebuf;
2535 cpp_buffer *buffer = pfile->buffer;
2536 register const unsigned char *cur = buffer->cur;
2537
2538 expanded:
2539 name_limit = list->namebuf + list->name_cap;
2540 namebuf = list->namebuf + list->name_used;
2541
2542 for (; cur < buffer->rlimit && namebuf < name_limit; )
2543 {
2544 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2545
2546 if (! is_idchar(c))
2547 goto out;
2548 namebuf++;
2549 cur++;
2550 if (c == '$' && CPP_PEDANTIC (pfile))
2551 {
2552 buffer->cur = cur;
2553 cpp_pedwarn (pfile, "'$' character in identifier");
2554 }
2555 }
2556
2557 /* Run out of name space? */
2558 if (cur < buffer->rlimit)
2559 {
2560 list->name_used = namebuf - list->namebuf;
2561 auto_expand_name_space (list);
2562 goto expanded;
2563 }
2564
2565 out:
2566 buffer->cur = cur;
2567 name->len = namebuf - (list->namebuf + name->offset);
2568 list->name_used = namebuf - list->namebuf;
2569}
2570
2571/* Parse (append) a number. */
2572
2573#define VALID_SIGN(c, prevc) \
2574 (((c) == '+' || (c) == '-') && \
2575 ((prevc) == 'e' || (prevc) == 'E' \
2576 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2577
2578static void
2579parse_number (pfile, list, name)
2580 cpp_reader *pfile;
2581 cpp_toklist *list;
2582 cpp_name *name;
2583{
2584 const unsigned char *name_limit;
2585 unsigned char *namebuf;
2586 cpp_buffer *buffer = pfile->buffer;
2587 register const unsigned char *cur = buffer->cur;
2588
2589 expanded:
2590 name_limit = list->namebuf + list->name_cap;
2591 namebuf = list->namebuf + list->name_used;
2592
2593 for (; cur < buffer->rlimit && namebuf < name_limit; )
2594 {
2595 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2596
2597 /* Perhaps we should accept '$' here if we accept it for
2598 identifiers. We know namebuf[-1] is safe, because for c to
2599 be a sign we must have pushed at least one character. */
2600 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2601 goto out;
2602
2603 namebuf++;
2604 cur++;
2605 }
2606
2607 /* Run out of name space? */
2608 if (cur < buffer->rlimit)
2609 {
2610 list->name_used = namebuf - list->namebuf;
2611 auto_expand_name_space (list);
2612 goto expanded;
2613 }
2614
2615 out:
2616 buffer->cur = cur;
2617 name->len = namebuf - (list->namebuf + name->offset);
2618 list->name_used = namebuf - list->namebuf;
2619}
2620
2621/* Places a string terminated by an unescaped TERMINATOR into a
2622 cpp_name, which should be expandable and thus at the top of the
2623 list's stack. Handles embedded trigraphs, if necessary, and
2624 escaped newlines.
2625
2626 Can be used for character constants (terminator = '\''), string
2627 constants ('"'), angled headers ('>') and assertions (')'). */
2628
2629static void
b8f41010 2630parse_string2 (pfile, list, name, terminator)
c5a04734
ZW
2631 cpp_reader *pfile;
2632 cpp_toklist *list;
2633 cpp_name *name;
2634 unsigned int terminator;
2635{
2636 cpp_buffer *buffer = pfile->buffer;
2637 register const unsigned char *cur = buffer->cur;
2638 const unsigned char *name_limit;
2639 unsigned char *namebuf;
2640 unsigned int null_count = 0;
2641 int trigraphed_len = 0;
2642
2643 expanded:
2644 name_limit = list->namebuf + list->name_cap;
2645 namebuf = list->namebuf + list->name_used;
2646
2647 for (; cur < buffer->rlimit && namebuf < name_limit; )
2648 {
2649 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
2650
2651 if (c == '\0')
2652 null_count++;
2653 else if (c == terminator || IS_NEWLINE (c))
2654 {
2655 unsigned char* name_start = list->namebuf + name->offset;
2656
2657 /* Needed for trigraph_replace and multiline string warning. */
2658 buffer->cur = cur;
2659
2660 /* Scan for trigraphs before checking if backslash-escaped. */
2661 if (CPP_OPTION (pfile, trigraphs)
2662 || CPP_OPTION (pfile, warn_trigraphs))
2663 {
2664 namebuf = trigraph_replace (pfile, name_start + trigraphed_len,
2665 namebuf);
2666 trigraphed_len = namebuf - 2 - (name_start + trigraphed_len);
2667 if (trigraphed_len < 0)
2668 trigraphed_len = 0;
2669 }
2670
2671 namebuf--; /* Drop the newline / terminator from the name. */
2672 if (IS_NEWLINE (c))
2673 {
2674 /* Drop a backslash newline, and continue. */
2675 if (namebuf[-1] == '\\')
2676 {
2677 handle_newline (cur, buffer->rlimit, c);
2678 namebuf--;
2679 continue;
2680 }
2681
2682 cur--;
2683
2684 /* In Fortran and assembly language, silently terminate
2685 strings of either variety at end of line. This is a
2686 kludge around not knowing where comments are in these
2687 languages. */
2688 if (CPP_OPTION (pfile, lang_fortran)
2689 || CPP_OPTION (pfile, lang_asm))
2690 goto out;
2691
2692 /* Character constants, headers and asserts may not
2693 extend over multiple lines. In Standard C, neither
2694 may strings. We accept multiline strings as an
2695 extension, but not in directives. */
2696 if (terminator != '"' || IS_DIRECTIVE (list))
2697 goto unterminated;
2698
2699 cur++; /* Move forwards again. */
2700
2701 if (pfile->multiline_string_line == 0)
2702 {
2703 pfile->multiline_string_line = list->line;
2704 if (CPP_PEDANTIC (pfile))
2705 cpp_pedwarn (pfile, "multi-line string constant");
2706 }
2707
2708 *namebuf++ = '\n';
2709 handle_newline (cur, buffer->rlimit, c);
2710 }
2711 else
2712 {
2713 unsigned char *temp;
2714
2715 /* An odd number of consecutive backslashes represents
2716 an escaped terminator. */
2717 temp = namebuf - 1;
2718 while (temp >= name_start && *temp == '\\')
2719 temp--;
2720
2721 if ((namebuf - temp) & 1)
2722 goto out;
2723 namebuf++;
2724 }
2725 }
2726 }
2727
2728 /* Run out of name space? */
2729 if (cur < buffer->rlimit)
2730 {
2731 list->name_used = namebuf - list->namebuf;
2732 auto_expand_name_space (list);
2733 goto expanded;
2734 }
2735
2736 /* We may not have trigraph-replaced the input for this code path,
2737 but as the input is in error by being unterminated we don't
2738 bother. Prevent warnings about no newlines at EOF. */
2739 if (IS_NEWLINE(cur[-1]))
2740 cur--;
2741
2742 unterminated:
2743 cpp_error (pfile, "missing terminating %c character", (int) terminator);
2744
2745 if (terminator == '\"' && pfile->multiline_string_line != list->line
2746 && pfile->multiline_string_line != 0)
2747 {
2748 cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2749 "possible start of unterminated string literal");
2750 pfile->multiline_string_line = 0;
2751 }
2752
2753 out:
2754 buffer->cur = cur;
2755 name->len = namebuf - (list->namebuf + name->offset);
2756 list->name_used = namebuf - list->namebuf;
2757
2758 if (null_count > 0)
2759 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2760 : "null character preserved"));
2761}
2762
2763/* The character C helps us distinguish comment types: '*' = C style,
2764 '-' = Chill-style and '/' = C++ style. For code simplicity, the
2765 stored comment includes any C-style comment terminator. */
2766static void
b8f41010 2767save_comment (list, from, len, tok_no, type)
c5a04734
ZW
2768 cpp_toklist *list;
2769 const unsigned char *from;
2770 unsigned int len;
2771 unsigned int tok_no;
2772 unsigned int type;
2773{
2774 cpp_token *comment;
2775
2776 if (list->comments_used == list->comments_cap)
2777 expand_comment_space (list);
2778
2779 if (list->name_used + len > list->name_cap)
2780 expand_name_space (list, len);
2781
2782 comment = &list->comments[list->comments_used++];
2783 comment->type = type;
2784 comment->aux = tok_no;
2785 comment->val.name.len = len;
2786 comment->val.name.offset = list->name_used;
2787
2788 memcpy (list->namebuf + list->name_used, from, len);
2789 list->name_used += len;
2790}
2791
2792/*
2793 * The tokenizer's main loop. Returns a token list, representing a
2794 * logical line in the input file, terminated with a CPP_VSPACE
2795 * token. On EOF, a token list containing the single CPP_EOF token
2796 * is returned.
2797 *
2798 * Implementation relies almost entirely on lookback, rather than
2799 * looking forwards. This means that tokenization requires just
2800 * a single pass of the file, even in the presence of trigraphs and
2801 * escaped newlines, providing significant performance benefits.
2802 * Trigraph overhead is negligible if they are disabled, and low
2803 * even when enabled.
2804 */
2805
c5a04734
ZW
2806void
2807_cpp_lex_line (pfile, list)
2808 cpp_reader *pfile;
2809 cpp_toklist *list;
2810{
2811 cpp_token *cur_token, *token_limit;
2812 cpp_buffer *buffer = pfile->buffer;
2813 register const unsigned char *cur = buffer->cur;
2814 unsigned char flags = 0;
2815
2816 expanded:
2817 token_limit = list->tokens + list->tokens_cap;
2818 cur_token = list->tokens + list->tokens_used;
2819
2820 for (; cur < buffer->rlimit && cur_token < token_limit;)
2821 {
2822 unsigned char c = *cur++;
2823
2824 /* Optimize whitespace skipping, in particular the case of a
2825 single whitespace character, as every other token is probably
2826 whitespace. (' ' '\t' '\v' '\f' '\0'). */
2827 if (is_hspace ((unsigned int) c))
2828 {
2829 if (c == '\0' || (cur < buffer->rlimit && is_hspace (*cur)))
2830 {
2831 buffer->cur = cur - (c == '\0'); /* Get the null warning. */
2832 skip_whitespace (pfile, IS_DIRECTIVE (list));
2833 cur = buffer->cur;
2834 }
2835 flags = PREV_WHITESPACE;
2836 if (cur == buffer->rlimit)
2837 break;
2838 c = *cur++;
2839 }
2840
2841 /* Initialize current token. Its type is set in the switch. */
2842 cur_token->col = COLUMN (cur);
2843 cur_token->flags = flags;
2844 flags = 0;
2845
2846 switch (c)
2847 {
2848 case '0': case '1': case '2': case '3': case '4':
2849 case '5': case '6': case '7': case '8': case '9':
2850 /* Prepend an immediately previous CPP_DOT token. */
2851 if (PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ())
2852 {
2853 cur_token--;
2854 if (list->name_cap == list->name_used)
2855 auto_expand_name_space (list);
2856
2857 cur_token->val.name.len = 1;
2858 cur_token->val.name.offset = list->name_used;
2859 list->namebuf[list->name_used++] = '.';
2860 }
2861 else
2862 INIT_NAME (list, cur_token->val.name);
2863 cur--; /* Backup character. */
2864
2865 continue_number:
2866 buffer->cur = cur;
2867 parse_number (pfile, list, &cur_token->val.name);
2868 cur = buffer->cur;
2869
2870 PUSH_TOKEN (CPP_NUMBER); /* Number not yet interpreted. */
2871 break;
2872
2873 letter:
2874 case '_':
2875 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2876 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2877 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2878 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2879 case 'y': case 'z':
2880 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2881 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2882 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2883 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2884 case 'Y': case 'Z':
2885 INIT_NAME (list, cur_token->val.name);
2886 cur--; /* Backup character. */
2887 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
2888
2889 continue_name:
2890 buffer->cur = cur;
2891 parse_name (pfile, list, &cur_token->val.name);
2892 cur = buffer->cur;
2893
2894 /* Find handler for newly created / extended directive. */
2895 if (IS_DIRECTIVE (list) && cur_token == &list->tokens[1])
2896 _cpp_check_directive (list, cur_token);
2897 cur_token++;
2898 break;
2899
2900 case '\'':
2901 /* Fall through. */
2902 case '\"':
2903 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2904 /* Do we have a wide string? */
2905 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2906 && cur_token[-1].val.name.len == 1
1920de47 2907 && *(list->namebuf + cur_token[-1].val.name.offset) == 'L'
c5a04734
ZW
2908 && !CPP_TRADITIONAL (pfile))
2909 {
2910 /* No need for 'L' any more. */
2911 list->name_used--;
2912 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
2913 }
2914
2915 do_parse_string:
2916 /* Here c is one of ' " > or ). */
2917 INIT_NAME (list, cur_token->val.name);
2918 buffer->cur = cur;
b8f41010 2919 parse_string2 (pfile, list, &cur_token->val.name, c);
c5a04734
ZW
2920 cur = buffer->cur;
2921 cur_token++;
2922 break;
2923
2924 case '/':
2925 cur_token->type = CPP_DIV;
2926 if (IMMED_TOKEN ())
2927 {
2928 if (PREV_TOKEN_TYPE == CPP_DIV)
2929 {
2930 /* We silently allow C++ comments in system headers,
2931 irrespective of conformance mode, because lots of
2932 broken systems do that and trying to clean it up
2933 in fixincludes is a nightmare. */
2934 if (buffer->system_header_p)
2935 goto do_line_comment;
2936 else if (CPP_OPTION (pfile, cplusplus_comments))
2937 {
2938 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
2939 && ! buffer->warned_cplusplus_comments)
2940 {
2941 buffer->cur = cur;
2942 cpp_pedwarn (pfile,
2943 "C++ style comments are not allowed in ISO C89");
2944 cpp_pedwarn (pfile,
2945 "(this will be reported only once per input file)");
2946 buffer->warned_cplusplus_comments = 1;
2947 }
2948 do_line_comment:
2949 buffer->cur = cur;
2950 if (cur[-2] != c)
2951 cpp_warning (pfile,
2952 "comment start split across lines");
b8f41010 2953 if (skip_line_comment2 (pfile))
c5a04734
ZW
2954 cpp_error_with_line (pfile, list->line,
2955 cur_token[-1].col,
2956 "multi-line comment");
2957 if (!CPP_OPTION (pfile, discard_comments))
b8f41010 2958 save_comment (list, cur, buffer->cur - cur,
c5a04734
ZW
2959 cur_token - 1 - list->tokens, c == '/'
2960 ? CPP_CPP_COMMENT: CPP_CHILL_COMMENT);
2961 cur = buffer->cur;
2962
2963 /* Back-up to first '-' or '/'. */
2964 cur_token -= 2;
2965 if (!CPP_OPTION (pfile, traditional))
2966 flags = PREV_WHITESPACE;
2967 }
2968 }
2969 }
2970 cur_token++;
2971 break;
2972
2973 case '*':
2974 cur_token->type = CPP_MULT;
2975 if (IMMED_TOKEN ())
2976 {
2977 if (PREV_TOKEN_TYPE == CPP_DIV)
2978 {
2979 buffer->cur = cur;
2980 if (cur[-2] != '/')
2981 cpp_warning (pfile,
2982 "comment start '/*' split across lines");
b8f41010 2983 if (skip_block_comment2 (pfile))
c5a04734
ZW
2984 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
2985 "unterminated comment");
2986 else if (buffer->cur[-2] != '*')
2987 cpp_warning (pfile,
2988 "comment end '*/' split across lines");
2989 if (!CPP_OPTION (pfile, discard_comments))
b8f41010 2990 save_comment (list, cur, buffer->cur - cur,
c5a04734
ZW
2991 cur_token - 1 - list->tokens, CPP_C_COMMENT);
2992 cur = buffer->cur;
2993
2994 cur_token -= 2;
2995 if (!CPP_OPTION (pfile, traditional))
2996 flags = PREV_WHITESPACE;
2997 }
2998 else if (CPP_OPTION (pfile, cplusplus))
2999 {
3000 /* In C++, there are .* and ->* operators. */
3001 if (PREV_TOKEN_TYPE == CPP_DEREF)
3002 BACKUP_TOKEN (CPP_DEREF_STAR);
3003 else if (PREV_TOKEN_TYPE == CPP_DOT)
3004 BACKUP_TOKEN (CPP_DOT_STAR);
3005 }
3006 }
3007 cur_token++;
3008 break;
3009
3010 case '\n':
3011 case '\r':
3012 handle_newline (cur, buffer->rlimit, c);
3013 if (PREV_TOKEN_TYPE != CPP_BACKSLASH || !IMMED_TOKEN ())
3014 {
3015 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
3016 {
3017 buffer->cur = cur;
3018 cpp_warning (pfile,
3019 "backslash and newline separated by space");
3020 }
3021 PUSH_TOKEN (CPP_VSPACE);
3022 goto out;
3023 }
3024 /* Remove the escaped newline. Then continue to process
3025 any interrupted name or number. */
3026 cur_token--;
3027 if (IMMED_TOKEN ())
3028 {
3029 cur_token--;
3030 if (cur_token->type == CPP_NAME)
3031 goto continue_name;
3032 else if (cur_token->type == CPP_NUMBER)
3033 goto continue_number;
3034 cur_token++;
3035 }
3036 break;
3037
3038 case '-':
3039 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3040 {
3041 if (CPP_OPTION (pfile, chill))
3042 goto do_line_comment;
3043 REVISE_TOKEN (CPP_MINUS_MINUS);
3044 }
3045 else
3046 PUSH_TOKEN (CPP_MINUS);
3047 break;
3048
3049 /* The digraph flag checking ensures that ## and %:%:
3050 are interpreted as CPP_PASTE, but #%: and %:# are not. */
3051 make_hash:
3052 case '#':
3053 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3054 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3055 REVISE_TOKEN (CPP_PASTE);
3056 else
3057 PUSH_TOKEN (CPP_HASH);
3058 break;
3059
3060 case ':':
3061 cur_token->type = CPP_COLON;
3062 if (IMMED_TOKEN ())
3063 {
3064 if (PREV_TOKEN_TYPE == CPP_COLON
3065 && CPP_OPTION (pfile, cplusplus))
3066 BACKUP_TOKEN (CPP_SCOPE);
3067 /* Digraph: "<:" is a '[' */
3068 else if (PREV_TOKEN_TYPE == CPP_LESS)
3069 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3070 /* Digraph: "%:" is a '#' */
3071 else if (PREV_TOKEN_TYPE == CPP_MOD)
3072 {
3073 (--cur_token)->flags |= DIGRAPH;
3074 goto make_hash;
3075 }
3076 }
3077 cur_token++;
3078 break;
3079
3080 case '&':
3081 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3082 REVISE_TOKEN (CPP_AND_AND);
3083 else
3084 PUSH_TOKEN (CPP_AND);
3085 break;
3086
3087 make_or:
3088 case '|':
3089 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3090 REVISE_TOKEN (CPP_OR_OR);
3091 else
3092 PUSH_TOKEN (CPP_OR);
3093 break;
3094
3095 case '+':
3096 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3097 REVISE_TOKEN (CPP_PLUS_PLUS);
3098 else
3099 PUSH_TOKEN (CPP_PLUS);
3100 break;
3101
3102 case '=':
3103 /* This relies on equidistance of "?=" and "?" tokens. */
3104 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3105 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3106 else
3107 PUSH_TOKEN (CPP_EQ);
3108 break;
3109
3110 case '>':
3111 cur_token->type = CPP_GREATER;
3112 if (IMMED_TOKEN ())
3113 {
3114 if (PREV_TOKEN_TYPE == CPP_GREATER)
3115 BACKUP_TOKEN (CPP_RSHIFT);
3116 else if (PREV_TOKEN_TYPE == CPP_MINUS)
3117 BACKUP_TOKEN (CPP_DEREF);
3118 /* Digraph: ":>" is a ']' */
3119 else if (PREV_TOKEN_TYPE == CPP_COLON)
3120 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3121 /* Digraph: "%>" is a '}' */
3122 else if (PREV_TOKEN_TYPE == CPP_MOD)
3123 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3124 }
3125 cur_token++;
3126 break;
3127
3128 case '<':
3129 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3130 {
3131 REVISE_TOKEN (CPP_LSHIFT);
3132 break;
3133 }
3134 /* Is this the beginning of a header name? */
3135 if (list->dir_flags & SYNTAX_INCLUDE)
3136 {
3137 c = '>'; /* Terminator. */
3138 cur_token->type = CPP_HEADER_NAME;
3139 goto do_parse_string;
3140 }
3141 PUSH_TOKEN (CPP_LESS);
3142 break;
3143
3144 case '%':
3145 /* Digraph: "<%" is a '{' */
3146 cur_token->type = CPP_MOD;
3147 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3148 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3149 cur_token++;
3150 break;
3151
c5a04734
ZW
3152 case '(':
3153 /* Is this the beginning of an assertion string? */
3154 if (list->dir_flags & SYNTAX_ASSERT)
3155 {
3156 c = ')'; /* Terminator. */
3157 cur_token->type = CPP_ASSERTION;
3158 goto do_parse_string;
3159 }
3160 PUSH_TOKEN (CPP_OPEN_PAREN);
3161 break;
3162
c5a04734
ZW
3163 case '?':
3164 if (cur + 1 < buffer->rlimit && *cur == '?'
3165 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3166 {
3167 /* Handle trigraph. */
3168 cur++;
3169 switch (*cur++)
3170 {
3171 case '(': goto make_open_square;
3172 case ')': goto make_close_square;
3173 case '<': goto make_open_brace;
3174 case '>': goto make_close_brace;
3175 case '=': goto make_hash;
3176 case '!': goto make_or;
3177 case '-': goto make_complement;
3178 case '/': goto make_backslash;
3179 case '\'': goto make_xor;
3180 }
3181 }
3182 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3183 {
3184 /* GNU C++ defines <? and >? operators. */
3185 if (PREV_TOKEN_TYPE == CPP_LESS)
3186 {
3187 REVISE_TOKEN (CPP_MIN);
3188 break;
3189 }
3190 else if (PREV_TOKEN_TYPE == CPP_GREATER)
3191 {
3192 REVISE_TOKEN (CPP_MAX);
3193 break;
3194 }
3195 }
3196 PUSH_TOKEN (CPP_QUERY);
3197 break;
3198
3199 case '.':
3200 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3201 && IMMED_TOKEN ()
3202 && !(cur_token[-1].flags & PREV_WHITESPACE))
3203 {
3204 cur_token -= 2;
3205 PUSH_TOKEN (CPP_ELLIPSIS);
3206 }
3207 else
3208 PUSH_TOKEN (CPP_DOT);
3209 break;
3210
cfd5b8b8
NB
3211 make_complement:
3212 case '~': PUSH_TOKEN (CPP_COMPL); break;
c5a04734
ZW
3213 make_xor:
3214 case '^': PUSH_TOKEN (CPP_XOR); break;
3215 make_open_brace:
3216 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3217 make_close_brace:
3218 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3219 make_open_square:
3220 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3221 make_close_square:
3222 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3223 make_backslash:
3224 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3225 case '!': PUSH_TOKEN (CPP_NOT); break;
3226 case ',': PUSH_TOKEN (CPP_COMMA); break;
3227 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
cfd5b8b8 3228 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
c5a04734
ZW
3229
3230 case '$':
3231 if (CPP_OPTION (pfile, dollars_in_ident))
3232 goto letter;
3233 /* Fall through */
3234 default:
3235 cur_token->aux = c;
3236 PUSH_TOKEN (CPP_OTHER);
3237 break;
3238 }
3239 }
3240
3241 /* Run out of token space? */
3242 if (cur_token == token_limit)
3243 {
3244 list->tokens_used = cur_token - list->tokens;
3245 expand_token_space (list);
3246 goto expanded;
3247 }
3248
3249 cur_token->type = CPP_EOF;
3250 cur_token->flags = flags;
3251
3252 if (cur_token != &list->tokens[0])
3253 {
3254 /* Next call back will get just a CPP_EOF. */
3255 buffer->cur = cur;
3256 cpp_warning (pfile, "no newline at end of file");
3257 PUSH_TOKEN (CPP_VSPACE);
3258 }
3259
3260 out:
3261 buffer->cur = cur;
3262
3263 list->tokens_used = cur_token - list->tokens;
3264
3265 /* FIXME: take this check out and put it in the caller.
3266 list->directive == 0 indicates an unknown directive (but null
3267 directive is OK). This is the first time we can be sure the
3268 directive is invalid, and thus warn about it, because it might
3269 have been split by escaped newlines. Also, don't complain about
3270 invalid directives in assembly source, we don't know where the
3271 comments are, and # may introduce assembler pseudo-ops. */
3272
3273 if (IS_DIRECTIVE (list) && list->dir_handler == 0
3274 && list->tokens[1].type != CPP_VSPACE
3275 && !CPP_OPTION (pfile, lang_asm))
3276 cpp_error_with_line (pfile, list->line, list->tokens[1].col,
3277 "invalid preprocessing directive");
3278}
3279
3280/* Token spelling functions. Used for output of a preprocessed file,
3281 stringizing and token pasting. They all assume sufficient buffer
3282 is allocated, and return exactly how much they used. */
3283
c5a04734
ZW
3284/* Needs buffer of 3 + len. */
3285unsigned int
3286spell_string (buffer, list, token)
3287 unsigned char *buffer;
3288 cpp_toklist *list;
3289 cpp_token *token;
3290{
cfd5b8b8 3291 unsigned char c, *orig_buff = buffer;
c5a04734
ZW
3292 size_t len;
3293
cfd5b8b8 3294 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
c5a04734 3295 *buffer++ = 'L';
cfd5b8b8
NB
3296 c = token->type == CPP_STRING || token->type == CPP_WSTRING ? '"': '\'';
3297 *buffer++ = c;
c5a04734
ZW
3298
3299 len = token->val.name.len;
1920de47 3300 memcpy (buffer, list->namebuf + token->val.name.offset, len);
c5a04734 3301 buffer += len;
cfd5b8b8 3302 *buffer++ = c;
c5a04734
ZW
3303 return buffer - orig_buff;
3304}
3305
3306/* Needs buffer of len + 2. */
3307unsigned int
3308spell_comment (buffer, list, token)
3309 unsigned char *buffer;
3310 cpp_toklist *list;
3311 cpp_token *token;
3312{
3313 size_t len;
3314
3315 if (token->type == CPP_C_COMMENT)
3316 {
3317 *buffer++ = '/';
3318 *buffer++ = '*';
3319 }
3320 else if (token->type == CPP_CPP_COMMENT)
3321 {
3322 *buffer++ = '/';
3323 *buffer++ = '/';
3324 }
3325 else
3326 {
3327 *buffer++ = '-';
3328 *buffer++ = '-';
3329 }
3330
3331 len = token->val.name.len;
1920de47 3332 memcpy (buffer, list->namebuf + token->val.name.offset, len);
c5a04734
ZW
3333
3334 return len + 2;
3335}
3336
3337/* Needs buffer of len. */
3338unsigned int
3339spell_name (buffer, list, token)
3340 unsigned char *buffer;
3341 cpp_toklist *list;
3342 cpp_token *token;
3343{
3344 size_t len;
3345
3346 len = token->val.name.len;
1920de47 3347 memcpy (buffer, list->namebuf + token->val.name.offset, len);
c5a04734
ZW
3348 buffer += len;
3349
3350 return len;
3351}
3352
c5a04734
ZW
3353void
3354_cpp_lex_file (pfile)
3355 cpp_reader* pfile;
3356{
3357 int recycle;
3358 cpp_toklist* list;
3359
3360 init_trigraph_map ();
3361 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3362
3363 for (recycle = 0; ;)
3364 {
3365 init_token_list (pfile, list, recycle);
3366 recycle = 1;
3367
3368 _cpp_lex_line (pfile, list);
3369 if (list->tokens[0].type == CPP_EOF)
3370 break;
3371
3372 if (list->dir_handler)
3373 {
3374 if (list->dir_handler (pfile))
3375 {
3376 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3377 recycle = 0;
3378 }
3379 }
3380 else
3381 _cpp_output_list (pfile, list);
3382 }
3383}
3384
b8f41010 3385/* Temporary function for illustrative purposes. */
c5a04734
ZW
3386static void
3387_cpp_output_list (pfile, list)
3388 cpp_reader *pfile;
3389 cpp_toklist *list;
3390{
3391 unsigned int comment_no = 0;
3392 cpp_token *token, *comment_token = 0;
3393
3394 if (list->comments_used > 0)
3395 comment_token = list->tokens + list->comments[0].aux;
3396
3397 CPP_RESERVE (pfile, 2); /* Always have room for " \n". */
3398 for (token = &list->tokens[0];; token++)
3399 {
3400 if (token->flags & PREV_WHITESPACE)
3401 {
3402 /* Output comments if -C. Otherwise a space will do. */
3403 if (token == comment_token)
3404 {
3405 cpp_token *comment = &list->comments[comment_no];
3406 do
3407 {
cfd5b8b8 3408 CPP_RESERVE (pfile, 2 + TOKEN_LEN (comment));
c5a04734
ZW
3409 pfile->limit += spell_comment (pfile->limit, list, comment);
3410 comment_no++, comment++;
3411 if (comment_no == list->comments_used)
3412 break;
3413 comment_token = comment->aux + list->tokens;
3414 }
3415 while (comment_token == token);
3416 }
3417 else
3418 CPP_PUTC_Q (pfile, ' ');
3419 }
3420
cfd5b8b8 3421 CPP_RESERVE (pfile, 2 + TOKEN_LEN (token));
c5a04734
ZW
3422 switch (token_spellings[token->type].type)
3423 {
3424 case SPELL_TEXT:
3425 {
3426 const unsigned char *spelling;
3427 unsigned char c;
3428
c5a04734 3429 if (token->flags & DIGRAPH)
cfd5b8b8 3430 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
c5a04734
ZW
3431 else
3432 spelling = token_spellings[token->type].speller;
3433
3434 while ((c = *spelling++) != '\0')
3435 CPP_PUTC_Q (pfile, c);
3436 }
3437 break;
3438
3439 case SPELL_HANDLER:
3440 {
3441 speller s;
3442
3443 s = (speller) token_spellings[token->type].speller;
c5a04734
ZW
3444 pfile->limit += s (pfile->limit, list, token);
3445 }
3446 break;
3447
cfd5b8b8
NB
3448 case SPELL_CHAR:
3449 *pfile->limit++ = token->aux;
3450 break;
3451
c5a04734
ZW
3452 case SPELL_EOL:
3453 CPP_PUTC_Q (pfile, '\n');
3454 return;
3455
3456 case SPELL_NONE:
3457 cpp_error (pfile, "Unwriteable token");
3458 break;
3459 }
3460 }
3461}
3462
3463#endif
This page took 0.453381 seconds and 5 git commands to generate.