]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
cppfiles.c (redundant_include_p): Provide length of token to cpp_defined.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23#include "config.h"
24#include "system.h"
25#include "intl.h"
e38992e8 26#include "hashtab.h"
45b966db
ZW
27#include "cpplib.h"
28#include "cpphash.h"
29
ff2b53ef
ZW
30#define PEEKBUF(BUFFER, N) \
31 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
32#define GETBUF(BUFFER) \
33 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
34#define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
35
36#define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
37#define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
38#define GETC() GETBUF (CPP_BUFFER (pfile))
39#define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
45b966db
ZW
40
41static void skip_block_comment PARAMS ((cpp_reader *));
42static void skip_line_comment PARAMS ((cpp_reader *));
43static int maybe_macroexpand PARAMS ((cpp_reader *, long));
44static int skip_comment PARAMS ((cpp_reader *, int));
45static int copy_comment PARAMS ((cpp_reader *, int));
46static void skip_string PARAMS ((cpp_reader *, int));
47static void parse_string PARAMS ((cpp_reader *, int));
48static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
49static int null_cleanup PARAMS ((cpp_buffer *, cpp_reader *));
64aaf407 50static void null_warning PARAMS ((cpp_reader *, unsigned int));
45b966db 51
f2d5f0cc
ZW
52static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
53 size_t, FILE *));
1368ee70
ZW
54static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
55 unsigned int));
56static void bump_column PARAMS ((cpp_printer *, unsigned int,
57 unsigned int));
c5a04734 58static void expand_name_space PARAMS ((cpp_toklist *, unsigned int));
1368ee70
ZW
59static void expand_token_space PARAMS ((cpp_toklist *));
60static void init_token_list PARAMS ((cpp_reader *, cpp_toklist *, int));
9e62c811
ZW
61static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
62 unsigned int));
f2d5f0cc 63
c5a04734
ZW
64#define auto_expand_name_space(list) \
65 expand_name_space ((list), (list)->name_cap / 2)
66
45b966db
ZW
67/* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
68
69void
70_cpp_grow_token_buffer (pfile, n)
71 cpp_reader *pfile;
72 long n;
73{
74 long old_written = CPP_WRITTEN (pfile);
75 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
76 pfile->token_buffer = (U_CHAR *)
77 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
78 CPP_SET_WRITTEN (pfile, old_written);
79}
80
81static int
82null_cleanup (pbuf, pfile)
83 cpp_buffer *pbuf ATTRIBUTE_UNUSED;
84 cpp_reader *pfile ATTRIBUTE_UNUSED;
85{
86 return 0;
87}
88
89/* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
90 If BUFFER != NULL, then use the LENGTH characters in BUFFER
91 as the new input buffer.
92 Return the new buffer, or NULL on failure. */
93
94cpp_buffer *
95cpp_push_buffer (pfile, buffer, length)
96 cpp_reader *pfile;
97 const U_CHAR *buffer;
98 long length;
99{
100 cpp_buffer *buf = CPP_BUFFER (pfile);
101 cpp_buffer *new;
102 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
103 {
104 cpp_fatal (pfile, "macro or `#include' recursion too deep");
105 return NULL;
106 }
107
108 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
109
110 new->if_stack = pfile->if_stack;
111 new->cleanup = null_cleanup;
112 new->buf = new->cur = buffer;
ff2b53ef 113 new->rlimit = buffer + length;
45b966db 114 new->prev = buf;
ff2b53ef 115 new->mark = NULL;
45b966db
ZW
116 new->line_base = NULL;
117
118 CPP_BUFFER (pfile) = new;
119 return new;
120}
121
122cpp_buffer *
123cpp_pop_buffer (pfile)
124 cpp_reader *pfile;
125{
126 cpp_buffer *buf = CPP_BUFFER (pfile);
127 if (ACTIVE_MARK_P (pfile))
128 cpp_ice (pfile, "mark active in cpp_pop_buffer");
129 (*buf->cleanup) (buf, pfile);
130 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
131 free (buf);
132 pfile->buffer_stack_depth--;
133 return CPP_BUFFER (pfile);
134}
135
f2d5f0cc
ZW
136/* Deal with the annoying semantics of fwrite. */
137static void
138safe_fwrite (pfile, buf, len, fp)
139 cpp_reader *pfile;
140 const U_CHAR *buf;
141 size_t len;
142 FILE *fp;
143{
144 size_t count;
45b966db 145
f2d5f0cc
ZW
146 while (len)
147 {
148 count = fwrite (buf, 1, len, fp);
149 if (count == 0)
150 goto error;
151 len -= count;
152 buf += count;
153 }
154 return;
155
156 error:
157 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
158}
159
160/* Notify the compiler proper that the current line number has jumped,
161 or the current file name has changed. */
162
163static void
1368ee70 164output_line_command (pfile, print, line)
45b966db 165 cpp_reader *pfile;
f2d5f0cc 166 cpp_printer *print;
1368ee70 167 unsigned int line;
45b966db 168{
1368ee70 169 cpp_buffer *ip = cpp_file_buffer (pfile);
f2d5f0cc
ZW
170 enum { same = 0, enter, leave, rname } change;
171 static const char * const codes[] = { "", " 1", " 2", "" };
172
173 if (CPP_OPTION (pfile, no_line_commands))
174 return;
175
f2d5f0cc
ZW
176 /* Determine whether the current filename has changed, and if so,
177 how. 'nominal_fname' values are unique, so they can be compared
178 by comparing pointers. */
179 if (ip->nominal_fname == print->last_fname)
180 change = same;
181 else
45b966db 182 {
f2d5f0cc
ZW
183 if (pfile->buffer_stack_depth == print->last_bsd)
184 change = rname;
185 else
45b966db 186 {
f2d5f0cc
ZW
187 if (pfile->buffer_stack_depth > print->last_bsd)
188 change = enter;
189 else
190 change = leave;
191 print->last_bsd = pfile->buffer_stack_depth;
45b966db 192 }
f2d5f0cc 193 print->last_fname = ip->nominal_fname;
45b966db 194 }
f2d5f0cc
ZW
195 /* If the current file has not changed, we can output a few newlines
196 instead if we want to increase the line number by a small amount.
197 We cannot do this if print->lineno is zero, because that means we
198 haven't output any line commands yet. (The very first line
199 command output is a `same_file' command.) */
200 if (change == same && print->lineno != 0
201 && line >= print->lineno && line < print->lineno + 8)
45b966db 202 {
f2d5f0cc 203 while (line > print->lineno)
45b966db 204 {
f2d5f0cc
ZW
205 putc ('\n', print->outf);
206 print->lineno++;
45b966db 207 }
f2d5f0cc 208 return;
45b966db 209 }
f2d5f0cc
ZW
210
211#ifndef NO_IMPLICIT_EXTERN_C
212 if (CPP_OPTION (pfile, cplusplus))
213 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
214 codes[change],
215 ip->system_header_p ? " 3" : "",
216 (ip->system_header_p == 2) ? " 4" : "");
217 else
218#endif
219 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
220 codes[change],
221 ip->system_header_p ? " 3" : "");
222 print->lineno = line;
223}
224
225/* Write the contents of the token_buffer to the output stream, and
226 clear the token_buffer. Also handles generating line commands and
227 keeping track of file transitions. */
228
229void
230cpp_output_tokens (pfile, print)
231 cpp_reader *pfile;
232 cpp_printer *print;
233{
1368ee70
ZW
234 cpp_buffer *ip;
235
f6fab919
ZW
236 if (CPP_WRITTEN (pfile) - print->written)
237 {
238 if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
239 print->lineno++;
240 safe_fwrite (pfile, pfile->token_buffer,
241 CPP_WRITTEN (pfile) - print->written, print->outf);
242 }
1368ee70
ZW
243
244 ip = cpp_file_buffer (pfile);
245 if (ip)
246 output_line_command (pfile, print, CPP_BUF_LINE (ip));
247
f2d5f0cc 248 CPP_SET_WRITTEN (pfile, print->written);
45b966db
ZW
249}
250
1368ee70
ZW
251/* Helper for cpp_output_list - increases the column number to match
252 what we expect it to be. */
253
254static void
255bump_column (print, from, to)
256 cpp_printer *print;
257 unsigned int from, to;
258{
259 unsigned int tabs, spcs;
260 unsigned int delta = to - from;
261
262 /* Only if FROM is 0, advance by tabs. */
263 if (from == 0)
264 tabs = delta / 8, spcs = delta % 8;
265 else
266 tabs = 0, spcs = delta;
267
268 while (tabs--) putc ('\t', print->outf);
269 while (spcs--) putc (' ', print->outf);
270}
271
272/* Write out the list L onto pfile->token_buffer. This function is
273 incomplete:
274
275 1) pfile->token_buffer is not going to continue to exist.
276 2) At the moment, tokens don't carry the information described
277 in cpplib.h; they are all strings.
278 3) The list has to be a complete line, and has to be written starting
279 at the beginning of a line. */
280
281void
282cpp_output_list (pfile, print, list)
283 cpp_reader *pfile;
284 cpp_printer *print;
285 const cpp_toklist *list;
286{
287 unsigned int i;
288 unsigned int curcol = 1;
289
290 /* XXX Probably does not do what is intended. */
291 if (print->lineno != list->line)
292 output_line_command (pfile, print, list->line);
293
294 for (i = 0; i < list->tokens_used; i++)
295 {
296 if (list->tokens[i].type == CPP_VSPACE)
297 {
298 output_line_command (pfile, print, list->tokens[i].aux);
299 continue;
300 }
301
302 if (curcol < list->tokens[i].col)
303 {
304 /* Insert space to bring the column to what it should be. */
305 bump_column (print, curcol - 1, list->tokens[i].col);
306 curcol = list->tokens[i].col;
307 }
308 /* XXX We may have to insert space to prevent an accidental
309 token paste. */
310 safe_fwrite (pfile, list->namebuf + list->tokens[i].val.name.offset,
311 list->tokens[i].val.name.len, print->outf);
312 curcol += list->tokens[i].val.name.len;
313 }
314}
315
f2d5f0cc
ZW
316/* Scan a string (which may have escape marks), perform macro expansion,
317 and write the result to the token_buffer. */
45b966db
ZW
318
319void
f2d5f0cc 320_cpp_expand_to_buffer (pfile, buf, length)
45b966db
ZW
321 cpp_reader *pfile;
322 const U_CHAR *buf;
323 int length;
324{
f2d5f0cc
ZW
325 cpp_buffer *ip;
326 enum cpp_ttype token;
f6fab919 327 U_CHAR *buf1;
45b966db
ZW
328
329 if (length < 0)
330 {
331 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
332 return;
333 }
334
f6fab919
ZW
335 /* Copy the buffer, because it might be in an unsafe place - for
336 example, a sequence on the token_buffer, where the pointers will
337 be invalidated if we enlarge the token_buffer. */
338 buf1 = alloca (length);
339 memcpy (buf1, buf, length);
340
45b966db 341 /* Set up the input on the input stack. */
f6fab919 342 ip = cpp_push_buffer (pfile, buf1, length);
45b966db
ZW
343 if (ip == NULL)
344 return;
345 ip->has_escapes = 1;
346
347 /* Scan the input, create the output. */
f2d5f0cc
ZW
348 for (;;)
349 {
350 token = cpp_get_token (pfile);
351 if (token == CPP_EOF)
352 break;
353 if (token == CPP_POP && CPP_BUFFER (pfile) == ip)
354 {
355 cpp_pop_buffer (pfile);
356 break;
357 }
358 }
45b966db
ZW
359}
360
f2d5f0cc
ZW
361/* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.
362 Then pop the buffer. */
363
364void
365cpp_scan_buffer_nooutput (pfile)
366 cpp_reader *pfile;
367{
368 cpp_buffer *buffer = CPP_BUFFER (pfile);
369 enum cpp_ttype token;
370 unsigned int old_written = CPP_WRITTEN (pfile);
371 /* In no-output mode, we can ignore everything but directives. */
372 for (;;)
373 {
374 if (! pfile->only_seen_white)
375 _cpp_skip_rest_of_line (pfile);
376 token = cpp_get_token (pfile);
377 if (token == CPP_EOF)
378 break;
379 if (token == CPP_POP && CPP_BUFFER (pfile) == buffer)
380 {
381 cpp_pop_buffer (pfile);
382 break;
383 }
384 }
385 CPP_SET_WRITTEN (pfile, old_written);
386}
387
388/* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.
389 Then pop the buffer. */
390
391void
392cpp_scan_buffer (pfile, print)
393 cpp_reader *pfile;
394 cpp_printer *print;
395{
396 cpp_buffer *buffer = CPP_BUFFER (pfile);
397 enum cpp_ttype token;
398
399 for (;;)
400 {
401 token = cpp_get_token (pfile);
402 if ((token == CPP_POP && !CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
403 || token == CPP_EOF || token == CPP_VSPACE
404 /* XXX Temporary kluge - force flush after #include only */
405 || (token == CPP_DIRECTIVE
406 && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
407 {
408 cpp_output_tokens (pfile, print);
409 if (token == CPP_EOF)
410 return;
411 if (token == CPP_POP && CPP_BUFFER (pfile) == buffer)
412 {
413 cpp_pop_buffer (pfile);
414 return;
415 }
416 }
417 }
418}
419
45b966db
ZW
420/* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
421
422cpp_buffer *
423cpp_file_buffer (pfile)
424 cpp_reader *pfile;
425{
426 cpp_buffer *ip;
427
428 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
429 if (ip->ihash != NULL)
430 return ip;
431 return NULL;
432}
433
1368ee70
ZW
434/* Token-buffer helper functions. */
435
436/* Expand a token list's string space. */
437static void
c5a04734 438expand_name_space (list, len)
1368ee70 439 cpp_toklist *list;
c5a04734
ZW
440 unsigned int len;
441{
442 list->name_cap += len;
443 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
1368ee70
ZW
444}
445
446/* Expand the number of tokens in a list. */
447static void
448expand_token_space (list)
449 cpp_toklist *list;
450{
451 list->tokens_cap *= 2;
452 list->tokens = (cpp_token *)
c5a04734
ZW
453 xrealloc (list->tokens - 1, (list->tokens_cap + 1) * sizeof (cpp_token));
454 list->tokens++; /* Skip the dummy. */
1368ee70
ZW
455}
456
c5a04734
ZW
457/* Initialize a token list. We allocate an extra token in front of
458 the token list, as this allows us to always peek at the previous
459 token without worrying about underflowing the list. */
1368ee70
ZW
460static void
461init_token_list (pfile, list, recycle)
462 cpp_reader *pfile;
463 cpp_toklist *list;
464 int recycle;
465{
c5a04734
ZW
466 /* Recycling a used list saves 3 free-malloc pairs. */
467 if (!recycle)
1368ee70 468 {
c5a04734
ZW
469 /* Initialize token space. Put a dummy token before the start
470 that will fail matches. */
471 list->tokens_cap = 256; /* 4K's worth. */
1368ee70 472 list->tokens = (cpp_token *)
c5a04734
ZW
473 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
474 list->tokens[0].type = CPP_EOF;
475 list->tokens++;
1368ee70 476
c5a04734 477 /* Initialize name space. */
1368ee70 478 list->name_cap = 1024;
1368ee70 479 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
c5a04734
ZW
480
481 /* Only create a comment space on demand. */
482 list->comments_cap = 0;
483 list->comments = 0;
1368ee70
ZW
484 }
485
c5a04734
ZW
486 list->tokens_used = 0;
487 list->name_used = 0;
488 list->comments_used = 0;
9e62c811
ZW
489 if (pfile->buffer)
490 list->line = pfile->buffer->lineno;
1368ee70
ZW
491 list->dir_handler = 0;
492 list->dir_flags = 0;
493}
494
495/* Scan an entire line and create a token list for it. Does not
496 macro-expand or execute directives. */
497
498void
499_cpp_scan_line (pfile, list)
500 cpp_reader *pfile;
501 cpp_toklist *list;
502{
503 int i, col;
504 long written, len;
505 enum cpp_ttype type;
9e62c811 506 int space_before;
1368ee70
ZW
507
508 init_token_list (pfile, list, 1);
509
510 written = CPP_WRITTEN (pfile);
511 i = 0;
9e62c811 512 space_before = 0;
1368ee70
ZW
513 for (;;)
514 {
515 col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
516 type = _cpp_lex_token (pfile);
517 len = CPP_WRITTEN (pfile) - written;
518 CPP_SET_WRITTEN (pfile, written);
519 if (type == CPP_HSPACE)
9e62c811
ZW
520 {
521 if (CPP_PEDANTIC (pfile))
522 pedantic_whitespace (pfile, pfile->token_buffer + written, len);
523 space_before = 1;
524 continue;
525 }
0f89df67
ZW
526 else if (type == CPP_COMMENT)
527 /* Only happens when processing -traditional macro definitions.
528 Do not give this a token entry, but do not change space_before
529 either. */
530 continue;
1368ee70
ZW
531
532 if (list->tokens_used >= list->tokens_cap)
533 expand_token_space (list);
534 if (list->name_used + len >= list->name_cap)
bb1ec1d7 535 expand_name_space (list, list->name_used + len + 1 - list->name_cap);
1368ee70 536
9e62c811
ZW
537 if (type == CPP_MACRO)
538 type = CPP_NAME;
539
1368ee70
ZW
540 list->tokens_used++;
541 list->tokens[i].type = type;
542 list->tokens[i].col = col;
c5a04734 543 list->tokens[i].flags = space_before ? PREV_WHITESPACE : 0;
9e62c811 544
1368ee70
ZW
545 if (type == CPP_VSPACE)
546 break;
547
548 list->tokens[i].val.name.len = len;
549 list->tokens[i].val.name.offset = list->name_used;
550 memcpy (list->namebuf + list->name_used, CPP_PWRITTEN (pfile), len);
551 list->name_used += len;
552 i++;
9e62c811 553 space_before = 0;
1368ee70
ZW
554 }
555 list->tokens[i].aux = CPP_BUFFER (pfile)->lineno + 1;
9e62c811
ZW
556
557 /* XXX Temporary kluge: put back the newline. */
558 FORWARD(-1);
1368ee70
ZW
559}
560
561
45b966db
ZW
562/* Skip a C-style block comment. We know it's a comment, and point is
563 at the second character of the starter. */
564static void
565skip_block_comment (pfile)
566 cpp_reader *pfile;
567{
3a2b2c7a 568 unsigned int line, col;
61474454 569 const U_CHAR *limit, *cur;
45b966db
ZW
570
571 FORWARD(1);
3a2b2c7a
ZW
572 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
573 col = CPP_BUF_COL (CPP_BUFFER (pfile));
61474454
NB
574 limit = CPP_BUFFER (pfile)->rlimit;
575 cur = CPP_BUFFER (pfile)->cur;
576
577 while (cur < limit)
45b966db 578 {
61474454
NB
579 char c = *cur++;
580 if (c == '\n' || c == '\r')
45b966db
ZW
581 {
582 /* \r cannot be a macro escape marker here. */
583 if (!ACTIVE_MARK_P (pfile))
61474454
NB
584 CPP_BUMP_LINE_CUR (pfile, cur);
585 }
586 else if (c == '*')
587 {
588 /* Check for teminator. */
589 if (cur < limit && *cur == '/')
590 goto out;
591
592 /* Warn about comment starter embedded in comment. */
593 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
594 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
595 cur - CPP_BUFFER (pfile)->line_base,
596 "'/*' within comment");
45b966db 597 }
45b966db 598 }
61474454
NB
599
600 cpp_error_with_line (pfile, line, col, "unterminated comment");
601 cur--;
602 out:
603 CPP_BUFFER (pfile)->cur = cur + 1;
45b966db
ZW
604}
605
606/* Skip a C++/Chill line comment. We know it's a comment, and point
607 is at the second character of the initiator. */
608static void
609skip_line_comment (pfile)
610 cpp_reader *pfile;
611{
612 FORWARD(1);
613 for (;;)
614 {
615 int c = GETC ();
616
617 /* We don't have to worry about EOF in here. */
618 if (c == '\n')
619 {
620 /* Don't consider final '\n' to be part of comment. */
621 FORWARD(-1);
622 return;
623 }
624 else if (c == '\r')
625 {
626 /* \r cannot be a macro escape marker here. */
627 if (!ACTIVE_MARK_P (pfile))
628 CPP_BUMP_LINE (pfile);
ae79697b 629 if (CPP_OPTION (pfile, warn_comments))
45b966db
ZW
630 cpp_warning (pfile, "backslash-newline within line comment");
631 }
632 }
633}
634
635/* Skip a comment - C, C++, or Chill style. M is the first character
636 of the comment marker. If this really is a comment, skip to its
637 end and return ' '. If this is not a comment, return M (which will
638 be '/' or '-'). */
639
640static int
641skip_comment (pfile, m)
642 cpp_reader *pfile;
643 int m;
644{
645 if (m == '/' && PEEKC() == '*')
646 {
647 skip_block_comment (pfile);
648 return ' ';
649 }
650 else if (m == '/' && PEEKC() == '/')
651 {
652 if (CPP_BUFFER (pfile)->system_header_p)
653 {
654 /* We silently allow C++ comments in system headers, irrespective
655 of conformance mode, because lots of busted systems do that
656 and trying to clean it up in fixincludes is a nightmare. */
657 skip_line_comment (pfile);
658 return ' ';
659 }
ae79697b 660 else if (CPP_OPTION (pfile, cplusplus_comments))
45b966db 661 {
0f89df67 662 if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
45b966db 663 {
0f89df67
ZW
664 if (CPP_WTRADITIONAL (pfile))
665 cpp_pedwarn (pfile,
666 "C++ style comments are not allowed in traditional C");
667 else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
668 cpp_pedwarn (pfile,
669 "C++ style comments are not allowed in ISO C89");
670 if (CPP_WTRADITIONAL (pfile)
671 || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
672 cpp_pedwarn (pfile,
45b966db
ZW
673 "(this will be reported only once per input file)");
674 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
675 }
676 skip_line_comment (pfile);
677 return ' ';
678 }
679 else
680 return m;
681 }
682 else if (m == '-' && PEEKC() == '-'
ae79697b 683 && CPP_OPTION (pfile, chill))
45b966db
ZW
684 {
685 skip_line_comment (pfile);
686 return ' ';
687 }
688 else
689 return m;
690}
691
692/* Identical to skip_comment except that it copies the comment into the
693 token_buffer. This is used if !discard_comments. */
694static int
695copy_comment (pfile, m)
696 cpp_reader *pfile;
697 int m;
698{
699 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
700 const U_CHAR *limit;
701
702 if (skip_comment (pfile, m) == m)
703 return m;
704
705 limit = CPP_BUFFER (pfile)->cur;
706 CPP_RESERVE (pfile, limit - start + 2);
707 CPP_PUTC_Q (pfile, m);
708 for (; start <= limit; start++)
709 if (*start != '\r')
710 CPP_PUTC_Q (pfile, *start);
711
712 return ' ';
713}
714
64aaf407
NB
715static void
716null_warning (pfile, count)
717 cpp_reader *pfile;
718 unsigned int count;
719{
720 if (count == 1)
721 cpp_warning (pfile, "embedded null character ignored");
722 else
723 cpp_warning (pfile, "embedded null characters ignored");
724}
725
45b966db
ZW
726/* Skip whitespace \-newline and comments. Does not macro-expand. */
727
728void
729_cpp_skip_hspace (pfile)
730 cpp_reader *pfile;
731{
64aaf407 732 unsigned int null_count = 0;
45b966db 733 int c;
64aaf407 734
45b966db
ZW
735 while (1)
736 {
737 c = GETC();
738 if (c == EOF)
64aaf407 739 goto out;
45b966db
ZW
740 else if (is_hspace(c))
741 {
742 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
743 cpp_pedwarn (pfile, "%s in preprocessing directive",
744 c == '\f' ? "formfeed" : "vertical tab");
64aaf407
NB
745 else if (c == '\0')
746 null_count++;
45b966db
ZW
747 }
748 else if (c == '\r')
749 {
750 /* \r is a backslash-newline marker if !has_escapes, and
751 a deletable-whitespace or no-reexpansion marker otherwise. */
752 if (CPP_BUFFER (pfile)->has_escapes)
753 {
754 if (PEEKC() == ' ')
755 FORWARD(1);
756 else
757 break;
758 }
759 else
760 CPP_BUMP_LINE (pfile);
761 }
762 else if (c == '/' || c == '-')
763 {
764 c = skip_comment (pfile, c);
765 if (c != ' ')
766 break;
767 }
768 else
769 break;
770 }
771 FORWARD(-1);
64aaf407
NB
772 out:
773 if (null_count)
774 null_warning (pfile, null_count);
45b966db
ZW
775}
776
777/* Read and discard the rest of the current line. */
778
779void
780_cpp_skip_rest_of_line (pfile)
781 cpp_reader *pfile;
782{
783 for (;;)
784 {
785 int c = GETC();
786 switch (c)
787 {
788 case '\n':
789 FORWARD(-1);
790 case EOF:
791 return;
792
793 case '\r':
794 if (! CPP_BUFFER (pfile)->has_escapes)
795 CPP_BUMP_LINE (pfile);
796 break;
797
798 case '\'':
799 case '\"':
800 skip_string (pfile, c);
801 break;
802
803 case '/':
804 case '-':
805 skip_comment (pfile, c);
806 break;
807
808 case '\f':
809 case '\v':
810 if (CPP_PEDANTIC (pfile))
811 cpp_pedwarn (pfile, "%s in preprocessing directive",
812 c == '\f' ? "formfeed" : "vertical tab");
813 break;
814
815 }
816 }
817}
818
819/* Parse an identifier starting with C. */
820
821void
822_cpp_parse_name (pfile, c)
823 cpp_reader *pfile;
824 int c;
825{
826 for (;;)
827 {
828 if (! is_idchar(c))
829 {
830 FORWARD (-1);
831 break;
832 }
833
834 if (c == '$' && CPP_PEDANTIC (pfile))
835 cpp_pedwarn (pfile, "`$' in identifier");
836
837 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
838 CPP_PUTC_Q (pfile, c);
839 c = GETC();
840 if (c == EOF)
841 break;
842 }
45b966db
ZW
843 return;
844}
845
846/* Parse and skip over a string starting with C. A single quoted
847 string is treated like a double -- some programs (e.g., troff) are
848 perverse this way. (However, a single quoted string is not allowed
849 to extend over multiple lines.) */
850static void
851skip_string (pfile, c)
852 cpp_reader *pfile;
853 int c;
854{
3a2b2c7a 855 unsigned int start_line, start_column;
64aaf407 856 unsigned int null_count = 0;
45b966db 857
3a2b2c7a
ZW
858 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
859 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
45b966db
ZW
860 while (1)
861 {
862 int cc = GETC();
863 switch (cc)
864 {
865 case EOF:
866 cpp_error_with_line (pfile, start_line, start_column,
867 "unterminated string or character constant");
868 if (pfile->multiline_string_line != start_line
869 && pfile->multiline_string_line != 0)
870 cpp_error_with_line (pfile,
871 pfile->multiline_string_line, -1,
872 "possible real start of unterminated constant");
873 pfile->multiline_string_line = 0;
64aaf407 874 goto out;
45b966db 875
64aaf407
NB
876 case '\0':
877 null_count++;
878 break;
879
45b966db
ZW
880 case '\n':
881 CPP_BUMP_LINE (pfile);
882 /* In Fortran and assembly language, silently terminate
883 strings of either variety at end of line. This is a
884 kludge around not knowing where comments are in these
885 languages. */
ae79697b
ZW
886 if (CPP_OPTION (pfile, lang_fortran)
887 || CPP_OPTION (pfile, lang_asm))
45b966db
ZW
888 {
889 FORWARD(-1);
64aaf407 890 goto out;
45b966db
ZW
891 }
892 /* Character constants may not extend over multiple lines.
893 In Standard C, neither may strings. We accept multiline
894 strings as an extension. */
895 if (c == '\'')
896 {
897 cpp_error_with_line (pfile, start_line, start_column,
898 "unterminated character constant");
899 FORWARD(-1);
64aaf407 900 goto out;
45b966db
ZW
901 }
902 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
903 cpp_pedwarn_with_line (pfile, start_line, start_column,
904 "string constant runs past end of line");
905 if (pfile->multiline_string_line == 0)
906 pfile->multiline_string_line = start_line;
907 break;
908
909 case '\r':
910 if (CPP_BUFFER (pfile)->has_escapes)
911 {
912 cpp_ice (pfile, "\\r escape inside string constant");
913 FORWARD(1);
914 }
915 else
916 /* Backslash newline is replaced by nothing at all. */
917 CPP_BUMP_LINE (pfile);
918 break;
919
920 case '\\':
921 FORWARD(1);
922 break;
923
924 case '\"':
925 case '\'':
926 if (cc == c)
64aaf407 927 goto out;
45b966db
ZW
928 break;
929 }
930 }
64aaf407
NB
931
932 out:
933 if (null_count == 1)
934 cpp_warning (pfile, "null character in string or character constant");
935 else if (null_count > 1)
936 cpp_warning (pfile, "null characters in string or character constant");
45b966db
ZW
937}
938
939/* Parse a string and copy it to the output. */
940
941static void
942parse_string (pfile, c)
943 cpp_reader *pfile;
944 int c;
945{
946 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
947 const U_CHAR *limit;
948
949 skip_string (pfile, c);
950
951 limit = CPP_BUFFER (pfile)->cur;
952 CPP_RESERVE (pfile, limit - start + 2);
953 CPP_PUTC_Q (pfile, c);
954 for (; start < limit; start++)
955 if (*start != '\r')
956 CPP_PUTC_Q (pfile, *start);
957}
958
959/* Read an assertion into the token buffer, converting to
960 canonical form: `#predicate(a n swe r)' The next non-whitespace
961 character to read should be the first letter of the predicate.
962 Returns 0 for syntax error, 1 for bare predicate, 2 for predicate
963 with answer (see callers for why). In case of 0, an error has been
964 printed. */
965int
966_cpp_parse_assertion (pfile)
967 cpp_reader *pfile;
968{
969 int c, dropwhite;
970 _cpp_skip_hspace (pfile);
971 c = PEEKC();
bfab56e7
ZW
972 if (c == '\n')
973 {
974 cpp_error (pfile, "assertion without predicate");
975 return 0;
976 }
977 else if (! is_idstart(c))
45b966db
ZW
978 {
979 cpp_error (pfile, "assertion predicate is not an identifier");
980 return 0;
981 }
982 CPP_PUTC(pfile, '#');
983 FORWARD(1);
984 _cpp_parse_name (pfile, c);
985
986 c = PEEKC();
987 if (c != '(')
988 {
989 if (is_hspace(c) || c == '\r')
990 _cpp_skip_hspace (pfile);
991 c = PEEKC();
992 }
993 if (c != '(')
994 return 1;
995
996 CPP_PUTC(pfile, '(');
997 FORWARD(1);
998 dropwhite = 1;
999 while ((c = GETC()) != ')')
1000 {
1001 if (is_space(c))
1002 {
1003 if (! dropwhite)
1004 {
1005 CPP_PUTC(pfile, ' ');
1006 dropwhite = 1;
1007 }
1008 }
1009 else if (c == '\n' || c == EOF)
1010 {
1011 if (c == '\n') FORWARD(-1);
1012 cpp_error (pfile, "un-terminated assertion answer");
1013 return 0;
1014 }
1015 else if (c == '\r')
1016 /* \r cannot be a macro escape here. */
1017 CPP_BUMP_LINE (pfile);
1018 else
1019 {
1020 CPP_PUTC (pfile, c);
1021 dropwhite = 0;
1022 }
1023 }
1024
1025 if (pfile->limit[-1] == ' ')
1026 pfile->limit[-1] = ')';
1027 else if (pfile->limit[-1] == '(')
1028 {
1029 cpp_error (pfile, "empty token sequence in assertion");
1030 return 0;
1031 }
1032 else
1033 CPP_PUTC (pfile, ')');
1034
45b966db
ZW
1035 return 2;
1036}
1037
1038/* Get the next token, and add it to the text in pfile->token_buffer.
1039 Return the kind of token we got. */
1040
3a2b2c7a 1041enum cpp_ttype
45b966db
ZW
1042_cpp_lex_token (pfile)
1043 cpp_reader *pfile;
1044{
5eec0563 1045 register int c, c2;
3a2b2c7a 1046 enum cpp_ttype token;
45b966db 1047
f2d5f0cc
ZW
1048 if (CPP_BUFFER (pfile) == NULL)
1049 return CPP_EOF;
1050
45b966db
ZW
1051 get_next:
1052 c = GETC();
1053 switch (c)
1054 {
1055 case EOF:
1056 return CPP_EOF;
1057
1058 case '/':
1059 if (PEEKC () == '=')
1060 goto op2;
1061
1062 comment:
ae79697b 1063 if (CPP_OPTION (pfile, discard_comments))
45b966db
ZW
1064 c = skip_comment (pfile, c);
1065 else
1066 c = copy_comment (pfile, c);
1067 if (c != ' ')
1068 goto randomchar;
1069
1070 /* Comments are equivalent to spaces.
1071 For -traditional, a comment is equivalent to nothing. */
ff2b53ef 1072 if (!CPP_OPTION (pfile, discard_comments))
45b966db 1073 return CPP_COMMENT;
9e62c811 1074 else if (CPP_TRADITIONAL (pfile))
0f89df67
ZW
1075 {
1076 if (pfile->parsing_define_directive)
1077 return CPP_COMMENT;
1078 goto get_next;
1079 }
45b966db
ZW
1080 else
1081 {
1082 CPP_PUTC (pfile, c);
1083 return CPP_HSPACE;
1084 }
1085
1086 case '#':
5eec0563
JM
1087 CPP_PUTC (pfile, c);
1088
1089 hash:
45b966db
ZW
1090 if (pfile->parsing_if_directive)
1091 {
f2d5f0cc 1092 CPP_ADJUST_WRITTEN (pfile, -1);
bfab56e7
ZW
1093 if (_cpp_parse_assertion (pfile))
1094 return CPP_ASSERTION;
5eec0563 1095 return CPP_OTHER;
45b966db
ZW
1096 }
1097
9e62c811 1098 if (pfile->parsing_define_directive)
45b966db 1099 {
5eec0563
JM
1100 c2 = PEEKC ();
1101 if (c2 == '#')
1102 {
1103 FORWARD (1);
1104 CPP_PUTC (pfile, c2);
1105 }
1106 else if (c2 == '%' && PEEKN (1) == ':')
1107 {
1108 /* Digraph: "%:" == "#". */
1109 FORWARD (1);
1110 CPP_RESERVE (pfile, 2);
1111 CPP_PUTC_Q (pfile, c2);
1112 CPP_PUTC_Q (pfile, GETC ());
1113 }
1114 else
1368ee70 1115 return CPP_HASH;
5eec0563 1116
1368ee70 1117 return CPP_PASTE;
45b966db
ZW
1118 }
1119
1120 if (!pfile->only_seen_white)
5eec0563
JM
1121 return CPP_OTHER;
1122
1123 /* Remove the "#" or "%:" from the token buffer. */
1124 CPP_ADJUST_WRITTEN (pfile, (c == '#' ? -1 : -2));
45b966db
ZW
1125 return CPP_DIRECTIVE;
1126
1127 case '\"':
1128 case '\'':
1129 parse_string (pfile, c);
45b966db
ZW
1130 return c == '\'' ? CPP_CHAR : CPP_STRING;
1131
1132 case '$':
ae79697b 1133 if (!CPP_OPTION (pfile, dollars_in_ident))
45b966db
ZW
1134 goto randomchar;
1135 goto letter;
1136
1137 case ':':
5eec0563
JM
1138 c2 = PEEKC ();
1139 /* Digraph: ":>" == "]". */
1140 if (c2 == '>'
1141 || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
45b966db
ZW
1142 goto op2;
1143 goto randomchar;
1144
1145 case '&':
1146 case '+':
1147 case '|':
1148 c2 = PEEKC ();
1149 if (c2 == c || c2 == '=')
1150 goto op2;
1151 goto randomchar;
1152
5eec0563
JM
1153 case '%':
1154 /* Digraphs: "%:" == "#", "%>" == "}". */
1155 c2 = PEEKC ();
1156 if (c2 == ':')
1157 {
1158 FORWARD (1);
1159 CPP_RESERVE (pfile, 2);
1160 CPP_PUTC_Q (pfile, c);
1161 CPP_PUTC_Q (pfile, c2);
1162 goto hash;
1163 }
1164 else if (c2 == '>')
1165 {
1166 FORWARD (1);
1167 CPP_RESERVE (pfile, 2);
1168 CPP_PUTC_Q (pfile, c);
1169 CPP_PUTC_Q (pfile, c2);
1368ee70 1170 return CPP_OPEN_BRACE;
5eec0563
JM
1171 }
1172 /* else fall through */
1173
45b966db
ZW
1174 case '*':
1175 case '!':
45b966db
ZW
1176 case '=':
1177 case '^':
1178 if (PEEKC () == '=')
1179 goto op2;
1180 goto randomchar;
1181
1182 case '-':
1183 c2 = PEEKC ();
1184 if (c2 == '-')
1185 {
ae79697b 1186 if (CPP_OPTION (pfile, chill))
45b966db
ZW
1187 goto comment; /* Chill style comment */
1188 else
1189 goto op2;
1190 }
1191 else if (c2 == '=')
1192 goto op2;
1193 else if (c2 == '>')
1194 {
ae79697b 1195 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
45b966db
ZW
1196 {
1197 /* In C++, there's a ->* operator. */
1198 token = CPP_OTHER;
45b966db
ZW
1199 CPP_RESERVE (pfile, 4);
1200 CPP_PUTC_Q (pfile, c);
1201 CPP_PUTC_Q (pfile, GETC ());
1202 CPP_PUTC_Q (pfile, GETC ());
45b966db
ZW
1203 return token;
1204 }
1205 goto op2;
1206 }
1207 goto randomchar;
1208
1209 case '<':
1210 if (pfile->parsing_include_directive)
1211 {
1212 for (;;)
1213 {
1214 CPP_PUTC (pfile, c);
1215 if (c == '>')
1216 break;
1217 c = GETC ();
1218 if (c == '\n' || c == EOF)
1219 {
1220 cpp_error (pfile,
1221 "missing '>' in `#include <FILENAME>'");
1222 break;
1223 }
1224 else if (c == '\r')
1225 {
1226 if (!CPP_BUFFER (pfile)->has_escapes)
1227 {
1228 /* Backslash newline is replaced by nothing. */
1229 CPP_ADJUST_WRITTEN (pfile, -1);
1230 CPP_BUMP_LINE (pfile);
1231 }
1232 else
1233 {
1234 /* We might conceivably get \r- or \r<space> in
1235 here. Just delete 'em. */
1236 int d = GETC();
1237 if (d != '-' && d != ' ')
1238 cpp_ice (pfile, "unrecognized escape \\r%c", d);
1239 CPP_ADJUST_WRITTEN (pfile, -1);
1240 }
1241 }
1242 }
1243 return CPP_STRING;
1244 }
5eec0563
JM
1245 /* Digraphs: "<%" == "{", "<:" == "[". */
1246 c2 = PEEKC ();
1247 if (c2 == '%')
1248 {
1249 FORWARD (1);
1250 CPP_RESERVE (pfile, 2);
1251 CPP_PUTC_Q (pfile, c);
1252 CPP_PUTC_Q (pfile, c2);
1368ee70 1253 return CPP_CLOSE_BRACE;
5eec0563
JM
1254 }
1255 else if (c2 == ':')
1256 goto op2;
45b966db
ZW
1257 /* else fall through */
1258 case '>':
1259 c2 = PEEKC ();
1260 if (c2 == '=')
1261 goto op2;
1262 /* GNU C++ supports MIN and MAX operators <? and >?. */
ae79697b 1263 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
45b966db
ZW
1264 goto randomchar;
1265 FORWARD(1);
5eec0563
JM
1266 CPP_RESERVE (pfile, 3);
1267 CPP_PUTC_Q (pfile, c);
1268 CPP_PUTC_Q (pfile, c2);
1269 if (PEEKC () == '=')
45b966db 1270 CPP_PUTC_Q (pfile, GETC ());
45b966db
ZW
1271 return CPP_OTHER;
1272
1273 case '.':
1274 c2 = PEEKC ();
5eec0563 1275 if (ISDIGIT (c2))
45b966db 1276 {
5eec0563 1277 CPP_PUTC (pfile, c);
45b966db
ZW
1278 c = GETC ();
1279 goto number;
1280 }
1281
1282 /* In C++ there's a .* operator. */
ae79697b 1283 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
45b966db
ZW
1284 goto op2;
1285
1286 if (c2 == '.' && PEEKN(1) == '.')
1287 {
5eec0563 1288 CPP_RESERVE (pfile, 3);
45b966db
ZW
1289 CPP_PUTC_Q (pfile, '.');
1290 CPP_PUTC_Q (pfile, '.');
1291 CPP_PUTC_Q (pfile, '.');
1292 FORWARD (2);
1368ee70 1293 return CPP_ELLIPSIS;
45b966db
ZW
1294 }
1295 goto randomchar;
1296
1297 op2:
5eec0563 1298 CPP_RESERVE (pfile, 2);
45b966db
ZW
1299 CPP_PUTC_Q (pfile, c);
1300 CPP_PUTC_Q (pfile, GETC ());
5eec0563 1301 return CPP_OTHER;
45b966db
ZW
1302
1303 case 'L':
1304 c2 = PEEKC ();
1305 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1306 {
1307 CPP_PUTC (pfile, c);
1308 c = GETC ();
1309 parse_string (pfile, c);
45b966db
ZW
1310 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1311 }
1312 goto letter;
1313
1314 case '0': case '1': case '2': case '3': case '4':
1315 case '5': case '6': case '7': case '8': case '9':
1316 number:
1317 c2 = '.';
1318 for (;;)
1319 {
1320 CPP_RESERVE (pfile, 2);
1321 CPP_PUTC_Q (pfile, c);
1322 c = PEEKC ();
1323 if (c == EOF)
1324 break;
1325 if (!is_numchar(c) && c != '.'
1326 && ((c2 != 'e' && c2 != 'E'
1327 && ((c2 != 'p' && c2 != 'P')
ae79697b 1328 || CPP_OPTION (pfile, c89)))
45b966db
ZW
1329 || (c != '+' && c != '-')))
1330 break;
1331 FORWARD(1);
1332 c2= c;
1333 }
45b966db
ZW
1334 return CPP_NUMBER;
1335 case 'b': case 'c': case 'd': case 'h': case 'o':
1336 case 'B': case 'C': case 'D': case 'H': case 'O':
ae79697b 1337 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
45b966db 1338 {
45b966db
ZW
1339 CPP_RESERVE (pfile, 2);
1340 CPP_PUTC_Q (pfile, c);
1341 CPP_PUTC_Q (pfile, '\'');
1342 FORWARD(1);
1343 for (;;)
1344 {
1345 c = GETC();
1346 if (c == EOF)
1347 goto chill_number_eof;
1348 if (!is_numchar(c))
1349 break;
1350 CPP_PUTC (pfile, c);
1351 }
1352 if (c == '\'')
1353 {
1354 CPP_RESERVE (pfile, 2);
1355 CPP_PUTC_Q (pfile, c);
45b966db
ZW
1356 return CPP_STRING;
1357 }
1358 else
1359 {
1360 FORWARD(-1);
1361 chill_number_eof:
45b966db
ZW
1362 return CPP_NUMBER;
1363 }
1364 }
1365 else
1366 goto letter;
1367 case '_':
1368 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1369 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1370 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1371 case 'x': case 'y': case 'z':
1372 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1373 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1374 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1375 case 'Y': case 'Z':
1376 letter:
45b966db
ZW
1377 _cpp_parse_name (pfile, c);
1378 return CPP_MACRO;
1379
64aaf407
NB
1380 case ' ': case '\t': case '\v': case '\f': case '\0':
1381 {
1382 int null_count = 0;
1383
1384 for (;;)
1385 {
1386 if (c == '\0')
1387 null_count++;
1388 else
1389 CPP_PUTC (pfile, c);
1390 c = PEEKC ();
1391 if (c == EOF || !is_hspace(c))
1392 break;
1393 FORWARD(1);
1394 }
1395 if (null_count)
1396 null_warning (pfile, null_count);
1397 return CPP_HSPACE;
1398 }
45b966db
ZW
1399
1400 case '\r':
1401 if (CPP_BUFFER (pfile)->has_escapes)
1402 {
1403 c = GETC ();
1404 if (c == '-')
1405 {
1406 if (pfile->output_escapes)
1407 CPP_PUTS (pfile, "\r-", 2);
1408 _cpp_parse_name (pfile, GETC ());
1409 return CPP_NAME;
1410 }
1411 else if (c == ' ')
1412 {
ff2b53ef
ZW
1413 /* "\r " means a space, but only if necessary to prevent
1414 accidental token concatenation. */
45b966db
ZW
1415 CPP_RESERVE (pfile, 2);
1416 if (pfile->output_escapes)
1417 CPP_PUTC_Q (pfile, '\r');
1418 CPP_PUTC_Q (pfile, c);
1419 return CPP_HSPACE;
1420 }
1421 else
1422 {
1423 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1424 goto get_next;
1425 }
1426 }
1427 else
1428 {
1429 /* Backslash newline is ignored. */
cbccf5e8
MM
1430 if (!ACTIVE_MARK_P (pfile))
1431 CPP_BUMP_LINE (pfile);
45b966db
ZW
1432 goto get_next;
1433 }
1434
1435 case '\n':
1436 CPP_PUTC (pfile, c);
45b966db
ZW
1437 return CPP_VSPACE;
1438
1368ee70
ZW
1439 case '(': token = CPP_OPEN_PAREN; goto char1;
1440 case ')': token = CPP_CLOSE_PAREN; goto char1;
1441 case '{': token = CPP_OPEN_BRACE; goto char1;
1442 case '}': token = CPP_CLOSE_BRACE; goto char1;
1443 case ',': token = CPP_COMMA; goto char1;
1444 case ';': token = CPP_SEMICOLON; goto char1;
45b966db
ZW
1445
1446 randomchar:
1447 default:
1448 token = CPP_OTHER;
1449 char1:
45b966db
ZW
1450 CPP_PUTC (pfile, c);
1451 return token;
1452 }
1453}
1454
1455/* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1456 Caller is expected to have checked no_macro_expand. */
1457static int
1458maybe_macroexpand (pfile, written)
1459 cpp_reader *pfile;
1460 long written;
1461{
1462 U_CHAR *macro = pfile->token_buffer + written;
1463 size_t len = CPP_WRITTEN (pfile) - written;
1464 HASHNODE *hp = _cpp_lookup (pfile, macro, len);
1465
1466 if (!hp)
1467 return 0;
d9e0bd53 1468 if (hp->disabled || hp->type == T_IDENTITY)
45b966db
ZW
1469 {
1470 if (pfile->output_escapes)
1471 {
1472 /* Insert a no-reexpand marker before IDENT. */
1473 CPP_RESERVE (pfile, 2);
1474 CPP_ADJUST_WRITTEN (pfile, 2);
1475 macro = pfile->token_buffer + written;
1476
1477 memmove (macro + 2, macro, len);
1478 macro[0] = '\r';
1479 macro[1] = '-';
1480 }
1481 return 0;
1482 }
ff2b53ef
ZW
1483 if (hp->type == T_EMPTY)
1484 {
1485 /* Special case optimization: macro expands to nothing. */
1486 CPP_SET_WRITTEN (pfile, written);
1487 CPP_PUTC_Q (pfile, ' ');
1488 return 1;
1489 }
45b966db
ZW
1490
1491 /* If macro wants an arglist, verify that a '(' follows. */
d9e0bd53 1492 if (hp->type == T_FMACRO)
45b966db
ZW
1493 {
1494 int macbuf_whitespace = 0;
1495 int c;
1496
1497 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1498 {
1499 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1500 for (;;)
1501 {
1502 _cpp_skip_hspace (pfile);
1503 c = PEEKC ();
1504 if (c == '\n')
1505 FORWARD(1);
1506 else
1507 break;
1508 }
1509 if (point != CPP_BUFFER (pfile)->cur)
1510 macbuf_whitespace = 1;
1511 if (c == '(')
1512 goto is_macro_call;
1513 else if (c != EOF)
1514 goto not_macro_call;
1515 cpp_pop_buffer (pfile);
1516 }
1517
1518 CPP_SET_MARK (pfile);
1519 for (;;)
1520 {
1521 _cpp_skip_hspace (pfile);
1522 c = PEEKC ();
1523 if (c == '\n')
1524 FORWARD(1);
1525 else
1526 break;
1527 }
1528 CPP_GOTO_MARK (pfile);
1529
1530 if (c != '(')
1531 {
1532 not_macro_call:
1533 if (macbuf_whitespace)
1534 CPP_PUTC (pfile, ' ');
1535 return 0;
1536 }
1537 }
1538
1539 is_macro_call:
1540 /* This is now known to be a macro call.
1541 Expand the macro, reading arguments as needed,
1542 and push the expansion on the input stack. */
1543 _cpp_macroexpand (pfile, hp);
1544 CPP_SET_WRITTEN (pfile, written);
1545 return 1;
1546}
1547
9e62c811
ZW
1548/* Complain about \v or \f in a preprocessing directive (constraint
1549 violation, C99 6.10 para 5). Caller has checked CPP_PEDANTIC. */
1550static void
1551pedantic_whitespace (pfile, p, len)
1552 cpp_reader *pfile;
1553 U_CHAR *p;
1554 unsigned int len;
1555{
1556 while (len)
1557 {
1558 if (*p == '\v')
1559 cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1560 else if (*p == '\f')
1561 cpp_pedwarn (pfile, "form feed in preprocessing directive");
1562 p++;
1563 len--;
1564 }
1565}
1566
1567
3a2b2c7a 1568enum cpp_ttype
45b966db
ZW
1569cpp_get_token (pfile)
1570 cpp_reader *pfile;
1571{
3a2b2c7a 1572 enum cpp_ttype token;
45b966db
ZW
1573 long written = CPP_WRITTEN (pfile);
1574
1575 get_next:
1576 token = _cpp_lex_token (pfile);
1577
1578 switch (token)
1579 {
1580 default:
ff2b53ef
ZW
1581 pfile->potential_control_macro = 0;
1582 pfile->only_seen_white = 0;
1583 return token;
1584
1585 case CPP_VSPACE:
1586 if (pfile->only_seen_white == 0)
1587 pfile->only_seen_white = 1;
1588 CPP_BUMP_LINE (pfile);
ff2b53ef
ZW
1589 return token;
1590
1591 case CPP_HSPACE:
1592 case CPP_COMMENT:
45b966db
ZW
1593 return token;
1594
1595 case CPP_DIRECTIVE:
ff2b53ef 1596 pfile->potential_control_macro = 0;
45b966db
ZW
1597 if (_cpp_handle_directive (pfile))
1598 return CPP_DIRECTIVE;
1599 pfile->only_seen_white = 0;
1600 CPP_PUTC (pfile, '#');
1601 return CPP_OTHER;
1602
1603 case CPP_MACRO:
ff2b53ef
ZW
1604 pfile->potential_control_macro = 0;
1605 pfile->only_seen_white = 0;
45b966db
ZW
1606 if (! pfile->no_macro_expand
1607 && maybe_macroexpand (pfile, written))
1608 goto get_next;
1609 return CPP_NAME;
1610
1611 case CPP_EOF:
f2d5f0cc
ZW
1612 if (CPP_BUFFER (pfile) == NULL)
1613 return CPP_EOF;
45b966db
ZW
1614 if (CPP_BUFFER (pfile)->manual_pop)
1615 /* If we've been reading from redirected input, the
1616 frontend will pop the buffer. */
1617 return CPP_EOF;
45b966db 1618
f2d5f0cc
ZW
1619 if (CPP_BUFFER (pfile)->seen_eof)
1620 {
45b966db
ZW
1621 cpp_pop_buffer (pfile);
1622 goto get_next;
1623 }
1624 else
1625 {
1626 _cpp_handle_eof (pfile);
1627 return CPP_POP;
1628 }
1629 }
1630}
1631
1632/* Like cpp_get_token, but skip spaces and comments. */
1633
3a2b2c7a 1634enum cpp_ttype
45b966db
ZW
1635cpp_get_non_space_token (pfile)
1636 cpp_reader *pfile;
1637{
1638 int old_written = CPP_WRITTEN (pfile);
1639 for (;;)
1640 {
3a2b2c7a 1641 enum cpp_ttype token = cpp_get_token (pfile);
ff2b53ef 1642 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
45b966db
ZW
1643 return token;
1644 CPP_SET_WRITTEN (pfile, old_written);
1645 }
1646}
1647
ff2b53ef 1648/* Like cpp_get_token, except that it does not execute directives,
9e62c811
ZW
1649 does not consume vertical space, discards horizontal space, and
1650 automatically pops off macro buffers. */
3a2b2c7a 1651enum cpp_ttype
9e62c811 1652_cpp_get_directive_token (pfile)
45b966db
ZW
1653 cpp_reader *pfile;
1654{
ff2b53ef 1655 long old_written;
3a2b2c7a 1656 enum cpp_ttype token;
45b966db 1657
ff2b53ef
ZW
1658 get_next:
1659 old_written = CPP_WRITTEN (pfile);
1660 token = _cpp_lex_token (pfile);
1661 switch (token)
45b966db 1662 {
ff2b53ef
ZW
1663 default:
1664 return token;
45b966db 1665
ff2b53ef
ZW
1666 case CPP_VSPACE:
1667 /* Put it back and return VSPACE. */
1668 FORWARD(-1);
1669 CPP_ADJUST_WRITTEN (pfile, -1);
1670 return CPP_VSPACE;
45b966db 1671
ff2b53ef
ZW
1672 case CPP_HSPACE:
1673 if (CPP_PEDANTIC (pfile))
9e62c811
ZW
1674 pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1675 CPP_WRITTEN (pfile) - old_written);
1676 CPP_SET_WRITTEN (pfile, old_written);
1677 goto get_next;
ff2b53ef 1678 return CPP_HSPACE;
45b966db 1679
ff2b53ef
ZW
1680 case CPP_DIRECTIVE:
1681 /* Don't execute the directive, but don't smash it to OTHER either. */
1682 CPP_PUTC (pfile, '#');
1683 return CPP_DIRECTIVE;
1684
1685 case CPP_MACRO:
1686 if (! pfile->no_macro_expand
1687 && maybe_macroexpand (pfile, old_written))
1688 goto get_next;
1689 return CPP_NAME;
45b966db 1690
ff2b53ef
ZW
1691 case CPP_EOF:
1692 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
45b966db 1693 {
ff2b53ef
ZW
1694 cpp_pop_buffer (pfile);
1695 goto get_next;
45b966db 1696 }
ff2b53ef
ZW
1697 else
1698 /* This can happen for files that don't end with a newline,
1699 and for cpp_define and friends. Pretend they do, so
1700 callers don't have to deal. A warning will be issued by
1701 someone else, if necessary. */
1702 return CPP_VSPACE;
1703 }
1704}
1705
45b966db
ZW
1706/* Determine the current line and column. Used only by read_and_prescan. */
1707static U_CHAR *
1708find_position (start, limit, linep)
1709 U_CHAR *start;
1710 U_CHAR *limit;
1711 unsigned long *linep;
1712{
1713 unsigned long line = *linep;
1714 U_CHAR *lbase = start;
1715 while (start < limit)
1716 {
1717 U_CHAR ch = *start++;
1718 if (ch == '\n' || ch == '\r')
1719 {
1720 line++;
1721 lbase = start;
1722 }
1723 }
1724 *linep = line;
1725 return lbase;
1726}
1727
2a87fbe8
ZW
1728/* The following table is used by _cpp_read_and_prescan. If we have
1729 designated initializers, it can be constant data; otherwise, it is
1730 set up at runtime by _cpp_init_input_buffer. */
46d07497
ZW
1731
1732#ifndef UCHAR_MAX
1733#define UCHAR_MAX 255 /* assume 8-bit bytes */
1734#endif
1735
1736#if (GCC_VERSION >= 2007) || (__STDC_VERSION__ >= 199901L)
2a87fbe8
ZW
1737#define init_chartab() /* nothing */
1738#define CHARTAB static const unsigned char chartab[UCHAR_MAX + 1] = {
46d07497
ZW
1739#define END };
1740#define s(p, v) [p] = v,
1741#else
2a87fbe8
ZW
1742#define CHARTAB static unsigned char chartab[UCHAR_MAX + 1] = { 0 }; \
1743 static void init_chartab PARAMS ((void)) { \
1744 unsigned char *x = chartab;
46d07497
ZW
1745#define END }
1746#define s(p, v) x[p] = v;
1747#endif
1748
1749/* Table of characters that can't be handled in the inner loop.
2a87fbe8
ZW
1750 Also contains the mapping between trigraph third characters and their
1751 replacements. */
46d07497
ZW
1752#define SPECCASE_CR 1
1753#define SPECCASE_BACKSLASH 2
1754#define SPECCASE_QUESTION 3
1755
2a87fbe8 1756CHARTAB
46d07497
ZW
1757 s('\r', SPECCASE_CR)
1758 s('\\', SPECCASE_BACKSLASH)
1759 s('?', SPECCASE_QUESTION)
46d07497 1760
46d07497
ZW
1761 s('=', '#') s(')', ']') s('!', '|')
1762 s('(', '[') s('\'', '^') s('>', '}')
1763 s('/', '\\') s('<', '{') s('-', '~')
1764END
1765
1766#undef CHARTAB
46d07497
ZW
1767#undef END
1768#undef s
1769
2a87fbe8
ZW
1770#define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1771#define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1772
45b966db
ZW
1773/* Read the entire contents of file DESC into buffer BUF. LEN is how
1774 much memory to allocate initially; more will be allocated if
1775 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1776 canonical form (\n). If enabled, convert and/or warn about
1777 trigraphs. Convert backslash-newline to a one-character escape
1778 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1779 token). If there is no newline at the end of the file, add one and
1780 warn. Returns -1 on failure, or the actual length of the data to
1781 be scanned.
1782
1783 This function does a lot of work, and can be a serious performance
1784 bottleneck. It has been tuned heavily; make sure you understand it
1785 before hacking. The common case - no trigraphs, Unix style line
1786 breaks, backslash-newline set off by whitespace, newline at EOF -
1787 has been optimized at the expense of the others. The performance
1788 penalty for DOS style line breaks (\r\n) is about 15%.
1789
1790 Warnings lose particularly heavily since we have to determine the
1791 line number, which involves scanning from the beginning of the file
1792 or from the last warning. The penalty for the absence of a newline
1793 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1794
1795 If your file has more than one kind of end-of-line marker, you
04e3ec78
NB
1796 will get messed-up line numbering.
1797
1798 So that the cases of the switch statement do not have to concern
1799 themselves with the complications of reading beyond the end of the
1800 buffer, the buffer is guaranteed to have at least 3 characters in
1801 it (or however many are left in the file, if less) on entry to the
1802 switch. This is enough to handle trigraphs and the "\\\n\r" and
1803 "\\\r\n" cases.
1804
1805 The end of the buffer is marked by a '\\', which, being a special
1806 character, guarantees we will exit the fast-scan loops and perform
1807 a refill. */
46d07497 1808
45b966db
ZW
1809long
1810_cpp_read_and_prescan (pfile, fp, desc, len)
1811 cpp_reader *pfile;
1812 cpp_buffer *fp;
1813 int desc;
1814 size_t len;
1815{
1816 U_CHAR *buf = (U_CHAR *) xmalloc (len);
1817 U_CHAR *ip, *op, *line_base;
1818 U_CHAR *ibase;
45b966db
ZW
1819 unsigned long line;
1820 unsigned int deferred_newlines;
45b966db 1821 size_t offset;
04e3ec78 1822 int count = 0;
45b966db
ZW
1823
1824 offset = 0;
04e3ec78 1825 deferred_newlines = 0;
45b966db
ZW
1826 op = buf;
1827 line_base = buf;
1828 line = 1;
04e3ec78
NB
1829 ibase = pfile->input_buffer + 3;
1830 ip = ibase;
1831 ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
45b966db
ZW
1832
1833 for (;;)
1834 {
04e3ec78
NB
1835 U_CHAR *near_buff_end;
1836
04e3ec78 1837 count = read (desc, ibase, pfile->input_buffer_len);
45b966db
ZW
1838 if (count < 0)
1839 goto error;
04e3ec78
NB
1840
1841 ibase[count] = '\\'; /* Marks end of buffer */
1842 if (count)
45b966db 1843 {
04e3ec78
NB
1844 near_buff_end = pfile->input_buffer + count;
1845 offset += count;
45b966db 1846 if (offset > len)
04e3ec78
NB
1847 {
1848 size_t delta_op;
1849 size_t delta_line_base;
1b955cba 1850 len = offset * 2;
04e3ec78
NB
1851 if (offset > len)
1852 /* len overflowed.
1853 This could happen if the file is larger than half the
1854 maximum address space of the machine. */
1855 goto too_big;
1856
1857 delta_op = op - buf;
1858 delta_line_base = line_base - buf;
1859 buf = (U_CHAR *) xrealloc (buf, len);
1860 op = buf + delta_op;
1861 line_base = buf + delta_line_base;
1862 }
1863 }
1864 else
1865 {
1866 if (ip == ibase)
1867 break;
1868 /* Allow normal processing of the (at most 2) remaining
1869 characters. The end-of-buffer marker is still present
1870 and prevents false matches within the switch. */
1871 near_buff_end = ibase - 1;
45b966db
ZW
1872 }
1873
1874 for (;;)
1875 {
04e3ec78 1876 unsigned int span;
45b966db 1877
04e3ec78 1878 /* Deal with \-newline, potentially in the middle of a token. */
45b966db
ZW
1879 if (deferred_newlines)
1880 {
2a87fbe8 1881 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
04e3ec78
NB
1882 {
1883 /* Previous was not white space. Skip to white
1884 space, if we can, before outputting the \r's */
1885 span = 0;
1886 while (ip[span] != ' '
1887 && ip[span] != '\t'
1888 && ip[span] != '\n'
2a87fbe8 1889 && NORMAL(ip[span]))
04e3ec78
NB
1890 span++;
1891 memcpy (op, ip, span);
1892 op += span;
1893 ip += span;
2a87fbe8 1894 if (! NORMAL(ip[0]))
04e3ec78
NB
1895 goto do_speccase;
1896 }
1897 while (deferred_newlines)
1898 deferred_newlines--, *op++ = '\r';
45b966db
ZW
1899 }
1900
1901 /* Copy as much as we can without special treatment. */
04e3ec78 1902 span = 0;
2a87fbe8 1903 while (NORMAL (ip[span])) span++;
45b966db
ZW
1904 memcpy (op, ip, span);
1905 op += span;
1906 ip += span;
1907
04e3ec78
NB
1908 do_speccase:
1909 if (ip > near_buff_end) /* Do we have enough chars? */
1910 break;
2a87fbe8 1911 switch (chartab[*ip++])
45b966db 1912 {
45b966db 1913 case SPECCASE_CR: /* \r */
04e3ec78 1914 if (ip[-2] != '\n')
45b966db 1915 {
04e3ec78
NB
1916 if (*ip == '\n')
1917 ip++;
1918 *op++ = '\n';
45b966db 1919 }
45b966db
ZW
1920 break;
1921
1922 case SPECCASE_BACKSLASH: /* \ */
04e3ec78 1923 if (*ip == '\n')
45b966db 1924 {
04e3ec78 1925 deferred_newlines++;
45b966db
ZW
1926 ip++;
1927 if (*ip == '\r') ip++;
45b966db
ZW
1928 }
1929 else if (*ip == '\r')
1930 {
04e3ec78 1931 deferred_newlines++;
45b966db
ZW
1932 ip++;
1933 if (*ip == '\n') ip++;
45b966db
ZW
1934 }
1935 else
1936 *op++ = '\\';
04e3ec78 1937 break;
45b966db
ZW
1938
1939 case SPECCASE_QUESTION: /* ? */
1940 {
1941 unsigned int d, t;
04e3ec78
NB
1942
1943 *op++ = '?'; /* Normal non-trigraph case */
1944 if (ip[0] != '?')
1945 break;
1946
45b966db 1947 d = ip[1];
2a87fbe8
ZW
1948 t = chartab[d];
1949 if (NONTRI (t))
04e3ec78 1950 break;
45b966db 1951
ae79697b 1952 if (CPP_OPTION (pfile, warn_trigraphs))
45b966db
ZW
1953 {
1954 unsigned long col;
1955 line_base = find_position (line_base, op, &line);
1956 col = op - line_base + 1;
ae79697b 1957 if (CPP_OPTION (pfile, trigraphs))
45b966db 1958 cpp_warning_with_line (pfile, line, col,
04e3ec78 1959 "trigraph ??%c converted to %c", d, t);
45b966db
ZW
1960 else
1961 cpp_warning_with_line (pfile, line, col,
04e3ec78 1962 "trigraph ??%c ignored", d);
45b966db 1963 }
04e3ec78
NB
1964
1965 ip += 2;
ae79697b 1966 if (CPP_OPTION (pfile, trigraphs))
45b966db 1967 {
04e3ec78 1968 op[-1] = t; /* Overwrite '?' */
45b966db 1969 if (t == '\\')
04e3ec78
NB
1970 {
1971 op--;
1972 *--ip = '\\';
1973 goto do_speccase; /* May need buffer refill */
1974 }
45b966db
ZW
1975 }
1976 else
1977 {
45b966db
ZW
1978 *op++ = '?';
1979 *op++ = d;
1980 }
1981 }
04e3ec78 1982 break;
45b966db
ZW
1983 }
1984 }
f6fab919
ZW
1985 /* Copy previous char plus unprocessed (at most 2) chars
1986 to beginning of buffer, refill it with another
1987 read(), and continue processing */
1988 memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
1989 ip -= count;
45b966db
ZW
1990 }
1991
1992 if (offset == 0)
1993 return 0;
1994
45b966db
ZW
1995 if (op[-1] != '\n')
1996 {
1997 unsigned long col;
1998 line_base = find_position (line_base, op, &line);
1999 col = op - line_base + 1;
f6fab919 2000 cpp_warning_with_line (pfile, line, col, "no newline at end of file");
45b966db
ZW
2001 if (offset + 1 > len)
2002 {
2003 len += 1;
2004 if (offset + 1 > len)
2005 goto too_big;
2006 buf = (U_CHAR *) xrealloc (buf, len);
2007 op = buf + offset;
2008 }
2009 *op++ = '\n';
2010 }
2011
2012 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
2013 return op - buf;
2014
2015 too_big:
f6fab919
ZW
2016 cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
2017 (unsigned long)offset);
45b966db
ZW
2018 free (buf);
2019 return -1;
2020
2021 error:
2022 cpp_error_from_errno (pfile, fp->ihash->name);
2023 free (buf);
2024 return -1;
2025}
2026
2a87fbe8
ZW
2027/* Allocate pfile->input_buffer, and initialize chartab[]
2028 if it hasn't happened already. */
46d07497 2029
45b966db
ZW
2030void
2031_cpp_init_input_buffer (pfile)
2032 cpp_reader *pfile;
2033{
2034 U_CHAR *tmp;
2035
2a87fbe8 2036 init_chartab ();
9e62c811 2037 init_token_list (pfile, &pfile->directbuf, 0);
04e3ec78 2038
45b966db
ZW
2039 /* Determine the appropriate size for the input buffer. Normal C
2040 source files are smaller than eight K. */
04e3ec78
NB
2041 /* 8Kbytes of buffer proper, 1 to detect running off the end without
2042 address arithmetic all the time, and 3 for pushback during buffer
2043 refill, in case there's a potential trigraph or end-of-line
2044 digraph at the end of a block. */
45b966db 2045
04e3ec78 2046 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
45b966db
ZW
2047 pfile->input_buffer = tmp;
2048 pfile->input_buffer_len = 8192;
2049}
c5a04734
ZW
2050
2051#if 0
2052
d6d5f795
NB
2053/* Lexing algorithm.
2054
2055 The original lexer in cpplib was made up of two passes: a first pass
2056 that replaced trigraphs and deleted esacped newlines, and a second
2057 pass that tokenized the result of the first pass. Tokenisation was
2058 performed by peeking at the next character in the input stream. For
6777db6d 2059 example, if the input stream contained "!=", the handler for the !
d6d5f795 2060 character would peek at the next character, and if it were a '='
6777db6d
NB
2061 would skip over it, and return a "!=" token, otherwise it would
2062 return just the "!" token.
d6d5f795
NB
2063
2064 To implement a single-pass lexer, this peeking ahead is unworkable.
2065 An arbitrary number of escaped newlines, and trigraphs (in particular
6777db6d
NB
2066 ??/ which translates to the escape \), could separate the '!' and '='
2067 in the input stream, yet the next token is still a "!=".
d6d5f795
NB
2068
2069 Suppose instead that we lex by one logical line at a time, producing
6777db6d
NB
2070 a token list or stack for each logical line, and when seeing the '!'
2071 push a CPP_NOT token on the list. Then if the '!' is part of a
2072 longer token ("!=") we know we must see the remainder of the token by
2073 the time we reach the end of the logical line. Thus we can have the
2074 '=' handler look at the previous token (at the end of the list / top
2075 of the stack) and see if it is a "!" token, and if so, instead of
2076 pushing a "=" token revise the existing token to be a "!=" token.
d6d5f795
NB
2077
2078 This works in the presence of escaped newlines, because the '\' would
2079 have been pushed on the top of the stack as a CPP_BACKSLASH. The
2080 newline ('\n' or '\r') handler looks at the token at the top of the
2081 stack to see if it is a CPP_BACKSLASH, and if so discards both.
2082 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
2083 the '=' handler would never see any intervening escaped newlines.
2084
2085 To make trigraphs work in this context, as in precedence trigraphs
2086 are highest and converted before anything else, the '?' handler does
2087 lookahead to see if it is a trigraph, and if so skips the trigraph
2088 and pushes the token it represents onto the top of the stack. This
2089 also works in the particular case of a CPP_BACKSLASH trigraph.
2090
2091 To the preprocessor, whitespace is only significant to the point of
2092 knowing whether whitespace precedes a particular token. For example,
2093 the '=' handler needs to know whether there was whitespace between it
6777db6d 2094 and a "!" token on the top of the stack, to make the token conversion
d6d5f795
NB
2095 decision correctly. So each token has a PREV_WHITESPACE flag to
2096 indicate this - the standard permits consecutive whitespace to be
2097 regarded as a single space. The compiler front ends are not
2098 interested in whitespace at all; they just require a token stream.
2099 Another place where whitespace is significant to the preprocessor is
2100 a #define statment - if there is whitespace between the macro name
2101 and an initial "(" token the macro is "object-like", otherwise it is
2102 a function-like macro that takes arguments.
2103
2104 However, all is not rosy. Parsing of identifiers, numbers, comments
2105 and strings becomes trickier because of the possibility of raw
2106 trigraphs and escaped newlines in the input stream.
2107
2108 The trigraphs are three consecutive characters beginning with two
c2e25d51
NB
2109 question marks. A question mark is not valid as part of a number or
2110 identifier, so parsing of a number or identifier terminates normally
2111 upon reaching it, returning to the mainloop which handles the
2112 trigraph just like it would in any other position. Similarly for the
2113 backslash of a backslash-newline combination. So we just need the
2114 escaped-newline dropper in the mainloop to check if the token on the
2115 top of the stack after dropping the escaped newline is a number or
2116 identifier, and if so to continue the processing it as if nothing had
2117 happened.
d6d5f795
NB
2118
2119 For strings, we replace trigraphs whenever we reach a quote or
2120 newline, because there might be a backslash trigraph escaping them.
2121 We need to be careful that we start trigraph replacing from where we
2122 left off previously, because it is possible for a first scan to leave
2123 "fake" trigraphs that a second scan would pick up as real (e.g. the
c2e25d51 2124 sequence "????/\n=" would find a fake ??= trigraph after removing the
d6d5f795
NB
2125 escaped newline.)
2126
2127 For line comments, on reaching a newline we scan the previous
2128 character(s) to see if it escaped, and continue if it is. Block
2129 comments ignore everything and just focus on finding the comment
2130 termination mark. The only difficult thing, and it is surprisingly
2131 tricky, is checking if an asterisk precedes the final slash since
2132 they could be separated by escaped newlines. If the preprocessor is
2133 invoked with the output comments option, we don't bother removing
2134 escaped newlines and replacing trigraphs for output.
2135
2136 Finally, numbers can begin with a period, which is pushed initially
2137 as a CPP_DOT token in its own right. The digit handler checks if the
2138 previous token was a CPP_DOT not separated by whitespace, and if so
2139 pops it off the stack and pushes a period into the number's buffer
2140 before calling the number parser.
2141
2142*/
2143
c5a04734
ZW
2144static void expand_comment_space PARAMS ((cpp_toklist *));
2145void init_trigraph_map PARAMS ((void));
2146static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
2147 unsigned char *));
2148static const unsigned char *backslash_start PARAMS ((cpp_reader *,
2149 const unsigned char *));
2150static int skip_block_comment PARAMS ((cpp_reader *));
2151static int skip_line_comment PARAMS ((cpp_reader *));
2152static void skip_whitespace PARAMS ((cpp_reader *, int));
2153static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
2154static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
2155static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
2156 unsigned int));
2157static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
2158static void copy_comment PARAMS ((cpp_toklist *, const unsigned char *,
2159 unsigned int, unsigned int, unsigned int));
2160void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
2161
2162static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
2163
c5a04734
ZW
2164unsigned int spell_string PARAMS ((unsigned char *, cpp_toklist *,
2165 cpp_token *token));
2166unsigned int spell_comment PARAMS ((unsigned char *, cpp_toklist *,
2167 cpp_token *token));
2168unsigned int spell_name PARAMS ((unsigned char *, cpp_toklist *,
2169 cpp_token *token));
c5a04734
ZW
2170
2171typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
2172 cpp_token *));
2173
2174/* Macros on a cpp_name. */
2175#define INIT_NAME(list, name) \
2176 do {(name).len = 0; (name).offset = (list)->name_used;} while (0)
2177
2178#define IS_DIRECTIVE(list) (list->tokens[0].type == CPP_HASH)
2179#define COLUMN(cur) ((cur) - buffer->line_base)
2180
2181/* Maybe put these in the ISTABLE eventually. */
2182#define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
2183#define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
2184
2185/* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
2186 character, if any, is in buffer. */
2187#define handle_newline(cur, limit, c) \
2188 do {\
2189 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
2190 (cur)++; \
2191 CPP_BUMP_LINE_CUR (pfile, (cur)); \
2192 } while (0)
2193
2194#define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
2195#define PREV_TOKEN_TYPE (cur_token[-1].type)
2196
2197#define SPELL_TEXT 0
2198#define SPELL_HANDLER 1
cfd5b8b8
NB
2199#define SPELL_CHAR 2
2200#define SPELL_NONE 3
2201#define SPELL_EOL 4
c5a04734
ZW
2202
2203#define T(e, s) {SPELL_TEXT, s},
2204#define H(e, s) {SPELL_HANDLER, s},
cfd5b8b8 2205#define C(e, s) {SPELL_CHAR, s},
c5a04734
ZW
2206#define N(e, s) {SPELL_NONE, s},
2207#define E(e, s) {SPELL_EOL, s},
2208
2209static const struct token_spelling
2210{
cfd5b8b8 2211 unsigned char type;
c5a04734
ZW
2212 PTR speller;
2213} token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
2214
2215#undef T
2216#undef H
cfd5b8b8 2217#undef C
c5a04734
ZW
2218#undef N
2219#undef E
2220
2221static const unsigned char *digraph_spellings [] = {"%:", "%:%:", "<:",
2222 ":>", "<%", "%>"};
2223
2224static void
2225expand_comment_space (list)
2226 cpp_toklist *list;
2227{
2228 if (list->comments_cap == 0)
2229 {
2230 list->comments_cap = 10;
2231 list->comments = (cpp_token *)
2232 xmalloc (list->comments_cap * sizeof (cpp_token));
2233 }
2234 else
2235 {
2236 list->comments_cap *= 2;
2237 list->comments = (cpp_token *)
2238 xrealloc (list->comments, list->comments_cap);
2239 }
2240}
2241
2242void
2243cpp_free_token_list (list)
2244 cpp_toklist *list;
2245{
2246 if (list->comments)
2247 free (list->comments);
cfd5b8b8 2248 free (list->tokens - 1); /* Backup over dummy token. */
c5a04734
ZW
2249 free (list->namebuf);
2250 free (list);
2251}
2252
cfd5b8b8 2253static unsigned char trigraph_map[256];
c5a04734
ZW
2254
2255void
2256init_trigraph_map ()
2257{
2258 trigraph_map['='] = '#';
2259 trigraph_map['('] = '[';
2260 trigraph_map[')'] = ']';
2261 trigraph_map['/'] = '\\';
2262 trigraph_map['\''] = '^';
2263 trigraph_map['<'] = '{';
2264 trigraph_map['>'] = '}';
2265 trigraph_map['!'] = '|';
2266 trigraph_map['-'] = '~';
2267}
2268
2269/* Call when a trigraph is encountered. It warns if necessary, and
2270 returns true if the trigraph should be honoured. END is the third
2271 character of a trigraph in the input stream. */
2272static int
2273trigraph_ok (pfile, end)
2274 cpp_reader *pfile;
2275 const unsigned char *end;
2276{
2277 int accept = CPP_OPTION (pfile, trigraphs);
2278
2279 if (CPP_OPTION (pfile, warn_trigraphs))
2280 {
2281 unsigned int col = end - 1 - pfile->buffer->line_base;
2282 if (accept)
2283 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2284 "trigraph ??%c converted to %c",
2285 (int) *end, (int) trigraph_map[*end]);
2286 else
2287 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2288 "trigraph ??%c ignored", (int) *end);
2289 }
2290 return accept;
2291}
2292
2293/* Scan a string for trigraphs, warning or replacing them inline as
2294 appropriate. When parsing a string, we must call this routine
2295 before processing a newline character (if trigraphs are enabled),
2296 since the newline might be escaped by a preceding backslash
2297 trigraph sequence. Returns a pointer to the end of the name after
2298 replacement. */
2299
2300static unsigned char*
2301trigraph_replace (pfile, src, limit)
2302 cpp_reader *pfile;
2303 unsigned char *src;
2304 unsigned char* limit;
2305{
2306 unsigned char *dest;
2307
2308 /* Starting with src[1], find two consecutive '?'. The case of no
2309 trigraphs is streamlined. */
2310
2311 for (; src + 1 < limit; src += 2)
2312 {
2313 if (src[0] != '?')
2314 continue;
2315
2316 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
2317 if (src[-1] == '?')
2318 src--;
2319 else if (src + 2 == limit || src[1] != '?')
2320 continue;
2321
2322 /* Check if it really is a trigraph. */
2323 if (trigraph_map[src[2]] == 0)
2324 continue;
2325
2326 dest = src;
2327 goto trigraph_found;
2328 }
2329 return limit;
2330
2331 /* Now we have a trigraph, we need to scan the remaining buffer, and
2332 copy-shifting its contents left if replacement is enabled. */
2333 for (; src + 2 < limit; dest++, src++)
2334 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2335 {
2336 trigraph_found:
2337 src += 2;
2338 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2339 *dest = trigraph_map[*src];
2340 }
2341
2342 /* Copy remaining (at most 2) characters. */
2343 while (src < limit)
2344 *dest++ = *src++;
2345 return dest;
2346}
2347
2348/* If CUR is a backslash or the end of a trigraphed backslash, return
2349 a pointer to its beginning, otherwise NULL. We don't read beyond
2350 the buffer start, because there is the start of the comment in the
2351 buffer. */
2352static const unsigned char *
2353backslash_start (pfile, cur)
2354 cpp_reader *pfile;
2355 const unsigned char *cur;
2356{
2357 if (cur[0] == '\\')
2358 return cur;
2359 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2360 && trigraph_ok (pfile, cur))
2361 return cur - 2;
2362 return 0;
2363}
2364
2365/* Skip a C-style block comment. This is probably the trickiest
2366 handler. We find the end of the comment by seeing if an asterisk
2367 is before every '/' we encounter. The nasty complication is that a
2368 previous asterisk may be separated by one or more escaped newlines.
2369 Returns non-zero if comment terminated by EOF, zero otherwise. */
2370static int
2371skip_block_comment (pfile)
2372 cpp_reader *pfile;
2373{
2374 cpp_buffer *buffer = pfile->buffer;
2375 const unsigned char *char_after_star = 0;
2376 register const unsigned char *cur = buffer->cur;
2377 int seen_eof = 0;
2378
2379 /* Inner loop would think the comment has ended if the first comment
2380 character is a '/'. Avoid this and keep the inner loop clean by
2381 skipping such a character. */
2382 if (cur < buffer->rlimit && cur[0] == '/')
2383 cur++;
2384
2385 for (; cur < buffer->rlimit; )
2386 {
2387 unsigned char c = *cur++;
2388
2389 /* People like decorating comments with '*', so check for
2390 '/' instead for efficiency. */
2391 if (c == '/')
2392 {
2393 if (cur[-2] == '*' || cur - 1 == char_after_star)
2394 goto out;
2395
2396 /* Warn about potential nested comments, but not when
2397 the final character inside the comment is a '/'.
2398 Don't bother to get it right across escaped newlines. */
2399 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2400 && cur[0] == '*' && cur[1] != '/')
2401 {
2402 buffer->cur = cur;
2403 cpp_warning (pfile, "'/*' within comment");
2404 }
2405 }
2406 else if (IS_NEWLINE(c))
2407 {
2408 const unsigned char* bslash = backslash_start (pfile, cur - 2);
2409
2410 handle_newline (cur, buffer->rlimit, c);
2411 /* Work correctly if there is an asterisk before an
2412 arbirtrarily long sequence of escaped newlines. */
2413 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2414 char_after_star = cur;
2415 else
2416 char_after_star = 0;
2417 }
2418 }
2419 seen_eof = 1;
2420
2421 out:
2422 buffer->cur = cur;
2423 return seen_eof;
2424}
2425
2426/* Skip a C++ or Chill line comment. Handles escaped newlines.
2427 Returns non-zero if a multiline comment. */
2428static int
2429skip_line_comment (pfile)
2430 cpp_reader *pfile;
2431{
2432 cpp_buffer *buffer = pfile->buffer;
2433 register const unsigned char *cur = buffer->cur;
2434 int multiline = 0;
2435
2436 for (; cur < buffer->rlimit; )
2437 {
2438 unsigned char c = *cur++;
2439
2440 if (IS_NEWLINE (c))
2441 {
2442 /* Check for a (trigaph?) backslash escaping the newline. */
2443 if (!backslash_start (pfile, cur - 2))
2444 goto out;
2445 multiline = 1;
2446 handle_newline (cur, buffer->rlimit, c);
2447 }
2448 }
2449 cur++;
2450
2451 out:
2452 buffer->cur = cur - 1; /* Leave newline for caller. */
2453 return multiline;
2454}
2455
2456/* Skips whitespace, stopping at next non-whitespace character. */
2457static void
2458skip_whitespace (pfile, in_directive)
2459 cpp_reader *pfile;
2460 int in_directive;
2461{
2462 cpp_buffer *buffer = pfile->buffer;
2463 register const unsigned char *cur = buffer->cur;
2464 unsigned short null_count = 0;
2465
2466 for (; cur < buffer->rlimit; )
2467 {
2468 unsigned char c = *cur++;
2469
2470 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
2471 continue;
2472 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
2473 goto out;
2474 if (c == '\0')
2475 null_count++;
2476 /* Mut be '\f' or '\v' */
2477 else if (in_directive && CPP_PEDANTIC (pfile))
2478 cpp_pedwarn (pfile, "%s in preprocessing directive",
2479 c == '\f' ? "formfeed" : "vertical tab");
2480 }
2481 cur++;
2482
2483 out:
2484 buffer->cur = cur - 1;
2485 if (null_count)
2486 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2487 : "embedded null character ignored");
2488}
2489
2490/* Parse (append) an identifier. */
2491static void
2492parse_name (pfile, list, name)
2493 cpp_reader *pfile;
2494 cpp_toklist *list;
2495 cpp_name *name;
2496{
2497 const unsigned char *name_limit;
2498 unsigned char *namebuf;
2499 cpp_buffer *buffer = pfile->buffer;
2500 register const unsigned char *cur = buffer->cur;
2501
2502 expanded:
2503 name_limit = list->namebuf + list->name_cap;
2504 namebuf = list->namebuf + list->name_used;
2505
2506 for (; cur < buffer->rlimit && namebuf < name_limit; )
2507 {
2508 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2509
2510 if (! is_idchar(c))
2511 goto out;
2512 namebuf++;
2513 cur++;
2514 if (c == '$' && CPP_PEDANTIC (pfile))
2515 {
2516 buffer->cur = cur;
2517 cpp_pedwarn (pfile, "'$' character in identifier");
2518 }
2519 }
2520
2521 /* Run out of name space? */
2522 if (cur < buffer->rlimit)
2523 {
2524 list->name_used = namebuf - list->namebuf;
2525 auto_expand_name_space (list);
2526 goto expanded;
2527 }
2528
2529 out:
2530 buffer->cur = cur;
2531 name->len = namebuf - (list->namebuf + name->offset);
2532 list->name_used = namebuf - list->namebuf;
2533}
2534
2535/* Parse (append) a number. */
2536
2537#define VALID_SIGN(c, prevc) \
2538 (((c) == '+' || (c) == '-') && \
2539 ((prevc) == 'e' || (prevc) == 'E' \
2540 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2541
2542static void
2543parse_number (pfile, list, name)
2544 cpp_reader *pfile;
2545 cpp_toklist *list;
2546 cpp_name *name;
2547{
2548 const unsigned char *name_limit;
2549 unsigned char *namebuf;
2550 cpp_buffer *buffer = pfile->buffer;
2551 register const unsigned char *cur = buffer->cur;
2552
2553 expanded:
2554 name_limit = list->namebuf + list->name_cap;
2555 namebuf = list->namebuf + list->name_used;
2556
2557 for (; cur < buffer->rlimit && namebuf < name_limit; )
2558 {
2559 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2560
2561 /* Perhaps we should accept '$' here if we accept it for
2562 identifiers. We know namebuf[-1] is safe, because for c to
2563 be a sign we must have pushed at least one character. */
2564 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2565 goto out;
2566
2567 namebuf++;
2568 cur++;
2569 }
2570
2571 /* Run out of name space? */
2572 if (cur < buffer->rlimit)
2573 {
2574 list->name_used = namebuf - list->namebuf;
2575 auto_expand_name_space (list);
2576 goto expanded;
2577 }
2578
2579 out:
2580 buffer->cur = cur;
2581 name->len = namebuf - (list->namebuf + name->offset);
2582 list->name_used = namebuf - list->namebuf;
2583}
2584
2585/* Places a string terminated by an unescaped TERMINATOR into a
2586 cpp_name, which should be expandable and thus at the top of the
2587 list's stack. Handles embedded trigraphs, if necessary, and
2588 escaped newlines.
2589
2590 Can be used for character constants (terminator = '\''), string
2591 constants ('"'), angled headers ('>') and assertions (')'). */
2592
2593static void
2594parse_string (pfile, list, name, terminator)
2595 cpp_reader *pfile;
2596 cpp_toklist *list;
2597 cpp_name *name;
2598 unsigned int terminator;
2599{
2600 cpp_buffer *buffer = pfile->buffer;
2601 register const unsigned char *cur = buffer->cur;
2602 const unsigned char *name_limit;
2603 unsigned char *namebuf;
2604 unsigned int null_count = 0;
2605 int trigraphed_len = 0;
2606
2607 expanded:
2608 name_limit = list->namebuf + list->name_cap;
2609 namebuf = list->namebuf + list->name_used;
2610
2611 for (; cur < buffer->rlimit && namebuf < name_limit; )
2612 {
2613 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
2614
2615 if (c == '\0')
2616 null_count++;
2617 else if (c == terminator || IS_NEWLINE (c))
2618 {
2619 unsigned char* name_start = list->namebuf + name->offset;
2620
2621 /* Needed for trigraph_replace and multiline string warning. */
2622 buffer->cur = cur;
2623
2624 /* Scan for trigraphs before checking if backslash-escaped. */
2625 if (CPP_OPTION (pfile, trigraphs)
2626 || CPP_OPTION (pfile, warn_trigraphs))
2627 {
2628 namebuf = trigraph_replace (pfile, name_start + trigraphed_len,
2629 namebuf);
2630 trigraphed_len = namebuf - 2 - (name_start + trigraphed_len);
2631 if (trigraphed_len < 0)
2632 trigraphed_len = 0;
2633 }
2634
2635 namebuf--; /* Drop the newline / terminator from the name. */
2636 if (IS_NEWLINE (c))
2637 {
2638 /* Drop a backslash newline, and continue. */
2639 if (namebuf[-1] == '\\')
2640 {
2641 handle_newline (cur, buffer->rlimit, c);
2642 namebuf--;
2643 continue;
2644 }
2645
2646 cur--;
2647
2648 /* In Fortran and assembly language, silently terminate
2649 strings of either variety at end of line. This is a
2650 kludge around not knowing where comments are in these
2651 languages. */
2652 if (CPP_OPTION (pfile, lang_fortran)
2653 || CPP_OPTION (pfile, lang_asm))
2654 goto out;
2655
2656 /* Character constants, headers and asserts may not
2657 extend over multiple lines. In Standard C, neither
2658 may strings. We accept multiline strings as an
2659 extension, but not in directives. */
2660 if (terminator != '"' || IS_DIRECTIVE (list))
2661 goto unterminated;
2662
2663 cur++; /* Move forwards again. */
2664
2665 if (pfile->multiline_string_line == 0)
2666 {
2667 pfile->multiline_string_line = list->line;
2668 if (CPP_PEDANTIC (pfile))
2669 cpp_pedwarn (pfile, "multi-line string constant");
2670 }
2671
2672 *namebuf++ = '\n';
2673 handle_newline (cur, buffer->rlimit, c);
2674 }
2675 else
2676 {
2677 unsigned char *temp;
2678
2679 /* An odd number of consecutive backslashes represents
2680 an escaped terminator. */
2681 temp = namebuf - 1;
2682 while (temp >= name_start && *temp == '\\')
2683 temp--;
2684
2685 if ((namebuf - temp) & 1)
2686 goto out;
2687 namebuf++;
2688 }
2689 }
2690 }
2691
2692 /* Run out of name space? */
2693 if (cur < buffer->rlimit)
2694 {
2695 list->name_used = namebuf - list->namebuf;
2696 auto_expand_name_space (list);
2697 goto expanded;
2698 }
2699
2700 /* We may not have trigraph-replaced the input for this code path,
2701 but as the input is in error by being unterminated we don't
2702 bother. Prevent warnings about no newlines at EOF. */
2703 if (IS_NEWLINE(cur[-1]))
2704 cur--;
2705
2706 unterminated:
2707 cpp_error (pfile, "missing terminating %c character", (int) terminator);
2708
2709 if (terminator == '\"' && pfile->multiline_string_line != list->line
2710 && pfile->multiline_string_line != 0)
2711 {
2712 cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2713 "possible start of unterminated string literal");
2714 pfile->multiline_string_line = 0;
2715 }
2716
2717 out:
2718 buffer->cur = cur;
2719 name->len = namebuf - (list->namebuf + name->offset);
2720 list->name_used = namebuf - list->namebuf;
2721
2722 if (null_count > 0)
2723 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2724 : "null character preserved"));
2725}
2726
2727/* The character C helps us distinguish comment types: '*' = C style,
2728 '-' = Chill-style and '/' = C++ style. For code simplicity, the
2729 stored comment includes any C-style comment terminator. */
2730static void
2731copy_comment (list, from, len, tok_no, type)
2732 cpp_toklist *list;
2733 const unsigned char *from;
2734 unsigned int len;
2735 unsigned int tok_no;
2736 unsigned int type;
2737{
2738 cpp_token *comment;
2739
2740 if (list->comments_used == list->comments_cap)
2741 expand_comment_space (list);
2742
2743 if (list->name_used + len > list->name_cap)
2744 expand_name_space (list, len);
2745
2746 comment = &list->comments[list->comments_used++];
2747 comment->type = type;
2748 comment->aux = tok_no;
2749 comment->val.name.len = len;
2750 comment->val.name.offset = list->name_used;
2751
2752 memcpy (list->namebuf + list->name_used, from, len);
2753 list->name_used += len;
2754}
2755
2756/*
2757 * The tokenizer's main loop. Returns a token list, representing a
2758 * logical line in the input file, terminated with a CPP_VSPACE
2759 * token. On EOF, a token list containing the single CPP_EOF token
2760 * is returned.
2761 *
2762 * Implementation relies almost entirely on lookback, rather than
2763 * looking forwards. This means that tokenization requires just
2764 * a single pass of the file, even in the presence of trigraphs and
2765 * escaped newlines, providing significant performance benefits.
2766 * Trigraph overhead is negligible if they are disabled, and low
2767 * even when enabled.
2768 */
2769
2770#define PUSH_TOKEN(ttype) cur_token++->type = ttype
2771#define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
2772#define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
2773#define BACKUP_DIGRAPH(ttype) do { \
2774 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
2775
2776void
2777_cpp_lex_line (pfile, list)
2778 cpp_reader *pfile;
2779 cpp_toklist *list;
2780{
2781 cpp_token *cur_token, *token_limit;
2782 cpp_buffer *buffer = pfile->buffer;
2783 register const unsigned char *cur = buffer->cur;
2784 unsigned char flags = 0;
2785
2786 expanded:
2787 token_limit = list->tokens + list->tokens_cap;
2788 cur_token = list->tokens + list->tokens_used;
2789
2790 for (; cur < buffer->rlimit && cur_token < token_limit;)
2791 {
2792 unsigned char c = *cur++;
2793
2794 /* Optimize whitespace skipping, in particular the case of a
2795 single whitespace character, as every other token is probably
2796 whitespace. (' ' '\t' '\v' '\f' '\0'). */
2797 if (is_hspace ((unsigned int) c))
2798 {
2799 if (c == '\0' || (cur < buffer->rlimit && is_hspace (*cur)))
2800 {
2801 buffer->cur = cur - (c == '\0'); /* Get the null warning. */
2802 skip_whitespace (pfile, IS_DIRECTIVE (list));
2803 cur = buffer->cur;
2804 }
2805 flags = PREV_WHITESPACE;
2806 if (cur == buffer->rlimit)
2807 break;
2808 c = *cur++;
2809 }
2810
2811 /* Initialize current token. Its type is set in the switch. */
2812 cur_token->col = COLUMN (cur);
2813 cur_token->flags = flags;
2814 flags = 0;
2815
2816 switch (c)
2817 {
2818 case '0': case '1': case '2': case '3': case '4':
2819 case '5': case '6': case '7': case '8': case '9':
2820 /* Prepend an immediately previous CPP_DOT token. */
2821 if (PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ())
2822 {
2823 cur_token--;
2824 if (list->name_cap == list->name_used)
2825 auto_expand_name_space (list);
2826
2827 cur_token->val.name.len = 1;
2828 cur_token->val.name.offset = list->name_used;
2829 list->namebuf[list->name_used++] = '.';
2830 }
2831 else
2832 INIT_NAME (list, cur_token->val.name);
2833 cur--; /* Backup character. */
2834
2835 continue_number:
2836 buffer->cur = cur;
2837 parse_number (pfile, list, &cur_token->val.name);
2838 cur = buffer->cur;
2839
2840 PUSH_TOKEN (CPP_NUMBER); /* Number not yet interpreted. */
2841 break;
2842
2843 letter:
2844 case '_':
2845 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2846 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2847 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2848 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2849 case 'y': case 'z':
2850 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2851 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2852 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2853 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2854 case 'Y': case 'Z':
2855 INIT_NAME (list, cur_token->val.name);
2856 cur--; /* Backup character. */
2857 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
2858
2859 continue_name:
2860 buffer->cur = cur;
2861 parse_name (pfile, list, &cur_token->val.name);
2862 cur = buffer->cur;
2863
2864 /* Find handler for newly created / extended directive. */
2865 if (IS_DIRECTIVE (list) && cur_token == &list->tokens[1])
2866 _cpp_check_directive (list, cur_token);
2867 cur_token++;
2868 break;
2869
2870 case '\'':
2871 /* Fall through. */
2872 case '\"':
2873 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2874 /* Do we have a wide string? */
2875 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2876 && cur_token[-1].val.name.len == 1
2877 && TOK_NAME (list, cur_token - 1)[0] == 'L'
2878 && !CPP_TRADITIONAL (pfile))
2879 {
2880 /* No need for 'L' any more. */
2881 list->name_used--;
2882 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
2883 }
2884
2885 do_parse_string:
2886 /* Here c is one of ' " > or ). */
2887 INIT_NAME (list, cur_token->val.name);
2888 buffer->cur = cur;
2889 parse_string (pfile, list, &cur_token->val.name, c);
2890 cur = buffer->cur;
2891 cur_token++;
2892 break;
2893
2894 case '/':
2895 cur_token->type = CPP_DIV;
2896 if (IMMED_TOKEN ())
2897 {
2898 if (PREV_TOKEN_TYPE == CPP_DIV)
2899 {
2900 /* We silently allow C++ comments in system headers,
2901 irrespective of conformance mode, because lots of
2902 broken systems do that and trying to clean it up
2903 in fixincludes is a nightmare. */
2904 if (buffer->system_header_p)
2905 goto do_line_comment;
2906 else if (CPP_OPTION (pfile, cplusplus_comments))
2907 {
2908 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
2909 && ! buffer->warned_cplusplus_comments)
2910 {
2911 buffer->cur = cur;
2912 cpp_pedwarn (pfile,
2913 "C++ style comments are not allowed in ISO C89");
2914 cpp_pedwarn (pfile,
2915 "(this will be reported only once per input file)");
2916 buffer->warned_cplusplus_comments = 1;
2917 }
2918 do_line_comment:
2919 buffer->cur = cur;
2920 if (cur[-2] != c)
2921 cpp_warning (pfile,
2922 "comment start split across lines");
2923 if (skip_line_comment (pfile))
2924 cpp_error_with_line (pfile, list->line,
2925 cur_token[-1].col,
2926 "multi-line comment");
2927 if (!CPP_OPTION (pfile, discard_comments))
2928 copy_comment (list, cur, buffer->cur - cur,
2929 cur_token - 1 - list->tokens, c == '/'
2930 ? CPP_CPP_COMMENT: CPP_CHILL_COMMENT);
2931 cur = buffer->cur;
2932
2933 /* Back-up to first '-' or '/'. */
2934 cur_token -= 2;
2935 if (!CPP_OPTION (pfile, traditional))
2936 flags = PREV_WHITESPACE;
2937 }
2938 }
2939 }
2940 cur_token++;
2941 break;
2942
2943 case '*':
2944 cur_token->type = CPP_MULT;
2945 if (IMMED_TOKEN ())
2946 {
2947 if (PREV_TOKEN_TYPE == CPP_DIV)
2948 {
2949 buffer->cur = cur;
2950 if (cur[-2] != '/')
2951 cpp_warning (pfile,
2952 "comment start '/*' split across lines");
2953 if (skip_block_comment (pfile))
2954 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
2955 "unterminated comment");
2956 else if (buffer->cur[-2] != '*')
2957 cpp_warning (pfile,
2958 "comment end '*/' split across lines");
2959 if (!CPP_OPTION (pfile, discard_comments))
2960 copy_comment (list, cur, buffer->cur - cur,
2961 cur_token - 1 - list->tokens, CPP_C_COMMENT);
2962 cur = buffer->cur;
2963
2964 cur_token -= 2;
2965 if (!CPP_OPTION (pfile, traditional))
2966 flags = PREV_WHITESPACE;
2967 }
2968 else if (CPP_OPTION (pfile, cplusplus))
2969 {
2970 /* In C++, there are .* and ->* operators. */
2971 if (PREV_TOKEN_TYPE == CPP_DEREF)
2972 BACKUP_TOKEN (CPP_DEREF_STAR);
2973 else if (PREV_TOKEN_TYPE == CPP_DOT)
2974 BACKUP_TOKEN (CPP_DOT_STAR);
2975 }
2976 }
2977 cur_token++;
2978 break;
2979
2980 case '\n':
2981 case '\r':
2982 handle_newline (cur, buffer->rlimit, c);
2983 if (PREV_TOKEN_TYPE != CPP_BACKSLASH || !IMMED_TOKEN ())
2984 {
2985 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
2986 {
2987 buffer->cur = cur;
2988 cpp_warning (pfile,
2989 "backslash and newline separated by space");
2990 }
2991 PUSH_TOKEN (CPP_VSPACE);
2992 goto out;
2993 }
2994 /* Remove the escaped newline. Then continue to process
2995 any interrupted name or number. */
2996 cur_token--;
2997 if (IMMED_TOKEN ())
2998 {
2999 cur_token--;
3000 if (cur_token->type == CPP_NAME)
3001 goto continue_name;
3002 else if (cur_token->type == CPP_NUMBER)
3003 goto continue_number;
3004 cur_token++;
3005 }
3006 break;
3007
3008 case '-':
3009 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3010 {
3011 if (CPP_OPTION (pfile, chill))
3012 goto do_line_comment;
3013 REVISE_TOKEN (CPP_MINUS_MINUS);
3014 }
3015 else
3016 PUSH_TOKEN (CPP_MINUS);
3017 break;
3018
3019 /* The digraph flag checking ensures that ## and %:%:
3020 are interpreted as CPP_PASTE, but #%: and %:# are not. */
3021 make_hash:
3022 case '#':
3023 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3024 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3025 REVISE_TOKEN (CPP_PASTE);
3026 else
3027 PUSH_TOKEN (CPP_HASH);
3028 break;
3029
3030 case ':':
3031 cur_token->type = CPP_COLON;
3032 if (IMMED_TOKEN ())
3033 {
3034 if (PREV_TOKEN_TYPE == CPP_COLON
3035 && CPP_OPTION (pfile, cplusplus))
3036 BACKUP_TOKEN (CPP_SCOPE);
3037 /* Digraph: "<:" is a '[' */
3038 else if (PREV_TOKEN_TYPE == CPP_LESS)
3039 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3040 /* Digraph: "%:" is a '#' */
3041 else if (PREV_TOKEN_TYPE == CPP_MOD)
3042 {
3043 (--cur_token)->flags |= DIGRAPH;
3044 goto make_hash;
3045 }
3046 }
3047 cur_token++;
3048 break;
3049
3050 case '&':
3051 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3052 REVISE_TOKEN (CPP_AND_AND);
3053 else
3054 PUSH_TOKEN (CPP_AND);
3055 break;
3056
3057 make_or:
3058 case '|':
3059 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3060 REVISE_TOKEN (CPP_OR_OR);
3061 else
3062 PUSH_TOKEN (CPP_OR);
3063 break;
3064
3065 case '+':
3066 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3067 REVISE_TOKEN (CPP_PLUS_PLUS);
3068 else
3069 PUSH_TOKEN (CPP_PLUS);
3070 break;
3071
3072 case '=':
3073 /* This relies on equidistance of "?=" and "?" tokens. */
3074 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3075 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3076 else
3077 PUSH_TOKEN (CPP_EQ);
3078 break;
3079
3080 case '>':
3081 cur_token->type = CPP_GREATER;
3082 if (IMMED_TOKEN ())
3083 {
3084 if (PREV_TOKEN_TYPE == CPP_GREATER)
3085 BACKUP_TOKEN (CPP_RSHIFT);
3086 else if (PREV_TOKEN_TYPE == CPP_MINUS)
3087 BACKUP_TOKEN (CPP_DEREF);
3088 /* Digraph: ":>" is a ']' */
3089 else if (PREV_TOKEN_TYPE == CPP_COLON)
3090 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3091 /* Digraph: "%>" is a '}' */
3092 else if (PREV_TOKEN_TYPE == CPP_MOD)
3093 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3094 }
3095 cur_token++;
3096 break;
3097
3098 case '<':
3099 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3100 {
3101 REVISE_TOKEN (CPP_LSHIFT);
3102 break;
3103 }
3104 /* Is this the beginning of a header name? */
3105 if (list->dir_flags & SYNTAX_INCLUDE)
3106 {
3107 c = '>'; /* Terminator. */
3108 cur_token->type = CPP_HEADER_NAME;
3109 goto do_parse_string;
3110 }
3111 PUSH_TOKEN (CPP_LESS);
3112 break;
3113
3114 case '%':
3115 /* Digraph: "<%" is a '{' */
3116 cur_token->type = CPP_MOD;
3117 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3118 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3119 cur_token++;
3120 break;
3121
c5a04734
ZW
3122 case '(':
3123 /* Is this the beginning of an assertion string? */
3124 if (list->dir_flags & SYNTAX_ASSERT)
3125 {
3126 c = ')'; /* Terminator. */
3127 cur_token->type = CPP_ASSERTION;
3128 goto do_parse_string;
3129 }
3130 PUSH_TOKEN (CPP_OPEN_PAREN);
3131 break;
3132
c5a04734
ZW
3133 case '?':
3134 if (cur + 1 < buffer->rlimit && *cur == '?'
3135 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3136 {
3137 /* Handle trigraph. */
3138 cur++;
3139 switch (*cur++)
3140 {
3141 case '(': goto make_open_square;
3142 case ')': goto make_close_square;
3143 case '<': goto make_open_brace;
3144 case '>': goto make_close_brace;
3145 case '=': goto make_hash;
3146 case '!': goto make_or;
3147 case '-': goto make_complement;
3148 case '/': goto make_backslash;
3149 case '\'': goto make_xor;
3150 }
3151 }
3152 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3153 {
3154 /* GNU C++ defines <? and >? operators. */
3155 if (PREV_TOKEN_TYPE == CPP_LESS)
3156 {
3157 REVISE_TOKEN (CPP_MIN);
3158 break;
3159 }
3160 else if (PREV_TOKEN_TYPE == CPP_GREATER)
3161 {
3162 REVISE_TOKEN (CPP_MAX);
3163 break;
3164 }
3165 }
3166 PUSH_TOKEN (CPP_QUERY);
3167 break;
3168
3169 case '.':
3170 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3171 && IMMED_TOKEN ()
3172 && !(cur_token[-1].flags & PREV_WHITESPACE))
3173 {
3174 cur_token -= 2;
3175 PUSH_TOKEN (CPP_ELLIPSIS);
3176 }
3177 else
3178 PUSH_TOKEN (CPP_DOT);
3179 break;
3180
cfd5b8b8
NB
3181 make_complement:
3182 case '~': PUSH_TOKEN (CPP_COMPL); break;
c5a04734
ZW
3183 make_xor:
3184 case '^': PUSH_TOKEN (CPP_XOR); break;
3185 make_open_brace:
3186 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3187 make_close_brace:
3188 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3189 make_open_square:
3190 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3191 make_close_square:
3192 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3193 make_backslash:
3194 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3195 case '!': PUSH_TOKEN (CPP_NOT); break;
3196 case ',': PUSH_TOKEN (CPP_COMMA); break;
3197 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
cfd5b8b8 3198 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
c5a04734
ZW
3199
3200 case '$':
3201 if (CPP_OPTION (pfile, dollars_in_ident))
3202 goto letter;
3203 /* Fall through */
3204 default:
3205 cur_token->aux = c;
3206 PUSH_TOKEN (CPP_OTHER);
3207 break;
3208 }
3209 }
3210
3211 /* Run out of token space? */
3212 if (cur_token == token_limit)
3213 {
3214 list->tokens_used = cur_token - list->tokens;
3215 expand_token_space (list);
3216 goto expanded;
3217 }
3218
3219 cur_token->type = CPP_EOF;
3220 cur_token->flags = flags;
3221
3222 if (cur_token != &list->tokens[0])
3223 {
3224 /* Next call back will get just a CPP_EOF. */
3225 buffer->cur = cur;
3226 cpp_warning (pfile, "no newline at end of file");
3227 PUSH_TOKEN (CPP_VSPACE);
3228 }
3229
3230 out:
3231 buffer->cur = cur;
3232
3233 list->tokens_used = cur_token - list->tokens;
3234
3235 /* FIXME: take this check out and put it in the caller.
3236 list->directive == 0 indicates an unknown directive (but null
3237 directive is OK). This is the first time we can be sure the
3238 directive is invalid, and thus warn about it, because it might
3239 have been split by escaped newlines. Also, don't complain about
3240 invalid directives in assembly source, we don't know where the
3241 comments are, and # may introduce assembler pseudo-ops. */
3242
3243 if (IS_DIRECTIVE (list) && list->dir_handler == 0
3244 && list->tokens[1].type != CPP_VSPACE
3245 && !CPP_OPTION (pfile, lang_asm))
3246 cpp_error_with_line (pfile, list->line, list->tokens[1].col,
3247 "invalid preprocessing directive");
3248}
3249
3250/* Token spelling functions. Used for output of a preprocessed file,
3251 stringizing and token pasting. They all assume sufficient buffer
3252 is allocated, and return exactly how much they used. */
3253
c5a04734
ZW
3254/* Needs buffer of 3 + len. */
3255unsigned int
3256spell_string (buffer, list, token)
3257 unsigned char *buffer;
3258 cpp_toklist *list;
3259 cpp_token *token;
3260{
cfd5b8b8 3261 unsigned char c, *orig_buff = buffer;
c5a04734
ZW
3262 size_t len;
3263
cfd5b8b8 3264 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
c5a04734 3265 *buffer++ = 'L';
cfd5b8b8
NB
3266 c = token->type == CPP_STRING || token->type == CPP_WSTRING ? '"': '\'';
3267 *buffer++ = c;
c5a04734
ZW
3268
3269 len = token->val.name.len;
3270 memcpy (buffer, TOK_NAME (list, token), len);
3271 buffer += len;
cfd5b8b8 3272 *buffer++ = c;
c5a04734
ZW
3273 return buffer - orig_buff;
3274}
3275
3276/* Needs buffer of len + 2. */
3277unsigned int
3278spell_comment (buffer, list, token)
3279 unsigned char *buffer;
3280 cpp_toklist *list;
3281 cpp_token *token;
3282{
3283 size_t len;
3284
3285 if (token->type == CPP_C_COMMENT)
3286 {
3287 *buffer++ = '/';
3288 *buffer++ = '*';
3289 }
3290 else if (token->type == CPP_CPP_COMMENT)
3291 {
3292 *buffer++ = '/';
3293 *buffer++ = '/';
3294 }
3295 else
3296 {
3297 *buffer++ = '-';
3298 *buffer++ = '-';
3299 }
3300
3301 len = token->val.name.len;
3302 memcpy (buffer, TOK_NAME (list, token), len);
3303
3304 return len + 2;
3305}
3306
3307/* Needs buffer of len. */
3308unsigned int
3309spell_name (buffer, list, token)
3310 unsigned char *buffer;
3311 cpp_toklist *list;
3312 cpp_token *token;
3313{
3314 size_t len;
3315
3316 len = token->val.name.len;
3317 memcpy (buffer, TOK_NAME (list, token), len);
3318 buffer += len;
3319
3320 return len;
3321}
3322
c5a04734
ZW
3323void
3324_cpp_lex_file (pfile)
3325 cpp_reader* pfile;
3326{
3327 int recycle;
3328 cpp_toklist* list;
3329
3330 init_trigraph_map ();
3331 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3332
3333 for (recycle = 0; ;)
3334 {
3335 init_token_list (pfile, list, recycle);
3336 recycle = 1;
3337
3338 _cpp_lex_line (pfile, list);
3339 if (list->tokens[0].type == CPP_EOF)
3340 break;
3341
3342 if (list->dir_handler)
3343 {
3344 if (list->dir_handler (pfile))
3345 {
3346 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3347 recycle = 0;
3348 }
3349 }
3350 else
3351 _cpp_output_list (pfile, list);
3352 }
3353}
3354
cfd5b8b8
NB
3355/* This could be useful to other routines. If you allocate this many
3356 bytes, you have enough room to spell the token. */
3357#define TOKEN_LEN(token) (4 + (token_spellings[token->type].type == \
3358 SPELL_HANDLER ? token->val.name.len: 0))
3359
c5a04734
ZW
3360static void
3361_cpp_output_list (pfile, list)
3362 cpp_reader *pfile;
3363 cpp_toklist *list;
3364{
3365 unsigned int comment_no = 0;
3366 cpp_token *token, *comment_token = 0;
3367
3368 if (list->comments_used > 0)
3369 comment_token = list->tokens + list->comments[0].aux;
3370
3371 CPP_RESERVE (pfile, 2); /* Always have room for " \n". */
3372 for (token = &list->tokens[0];; token++)
3373 {
3374 if (token->flags & PREV_WHITESPACE)
3375 {
3376 /* Output comments if -C. Otherwise a space will do. */
3377 if (token == comment_token)
3378 {
3379 cpp_token *comment = &list->comments[comment_no];
3380 do
3381 {
cfd5b8b8 3382 CPP_RESERVE (pfile, 2 + TOKEN_LEN (comment));
c5a04734
ZW
3383 pfile->limit += spell_comment (pfile->limit, list, comment);
3384 comment_no++, comment++;
3385 if (comment_no == list->comments_used)
3386 break;
3387 comment_token = comment->aux + list->tokens;
3388 }
3389 while (comment_token == token);
3390 }
3391 else
3392 CPP_PUTC_Q (pfile, ' ');
3393 }
3394
cfd5b8b8 3395 CPP_RESERVE (pfile, 2 + TOKEN_LEN (token));
c5a04734
ZW
3396 switch (token_spellings[token->type].type)
3397 {
3398 case SPELL_TEXT:
3399 {
3400 const unsigned char *spelling;
3401 unsigned char c;
3402
c5a04734 3403 if (token->flags & DIGRAPH)
cfd5b8b8 3404 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
c5a04734
ZW
3405 else
3406 spelling = token_spellings[token->type].speller;
3407
3408 while ((c = *spelling++) != '\0')
3409 CPP_PUTC_Q (pfile, c);
3410 }
3411 break;
3412
3413 case SPELL_HANDLER:
3414 {
3415 speller s;
3416
3417 s = (speller) token_spellings[token->type].speller;
c5a04734
ZW
3418 pfile->limit += s (pfile->limit, list, token);
3419 }
3420 break;
3421
cfd5b8b8
NB
3422 case SPELL_CHAR:
3423 *pfile->limit++ = token->aux;
3424 break;
3425
c5a04734
ZW
3426 case SPELL_EOL:
3427 CPP_PUTC_Q (pfile, '\n');
3428 return;
3429
3430 case SPELL_NONE:
3431 cpp_error (pfile, "Unwriteable token");
3432 break;
3433 }
3434 }
3435}
3436
3437#endif
This page took 3.399648 seconds and 5 git commands to generate.