]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
Daily bump.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 2, or (at your option) any
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; if not, write to the Free Software
20Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
21
22#include "config.h"
23#include "system.h"
24#include "intl.h"
25#include "cpplib.h"
26#include "cpphash.h"
27
ff2b53ef
ZW
28#define PEEKBUF(BUFFER, N) \
29 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
30#define GETBUF(BUFFER) \
31 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
32#define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
33
34#define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
35#define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
36#define GETC() GETBUF (CPP_BUFFER (pfile))
37#define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
45b966db
ZW
38
39static void skip_block_comment PARAMS ((cpp_reader *));
40static void skip_line_comment PARAMS ((cpp_reader *));
41static int maybe_macroexpand PARAMS ((cpp_reader *, long));
42static int skip_comment PARAMS ((cpp_reader *, int));
43static int copy_comment PARAMS ((cpp_reader *, int));
44static void skip_string PARAMS ((cpp_reader *, int));
45static void parse_string PARAMS ((cpp_reader *, int));
46static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
47static int null_cleanup PARAMS ((cpp_buffer *, cpp_reader *));
64aaf407 48static void null_warning PARAMS ((cpp_reader *, unsigned int));
45b966db
ZW
49
50/* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
51
52void
53_cpp_grow_token_buffer (pfile, n)
54 cpp_reader *pfile;
55 long n;
56{
57 long old_written = CPP_WRITTEN (pfile);
58 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
59 pfile->token_buffer = (U_CHAR *)
60 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
61 CPP_SET_WRITTEN (pfile, old_written);
62}
63
64static int
65null_cleanup (pbuf, pfile)
66 cpp_buffer *pbuf ATTRIBUTE_UNUSED;
67 cpp_reader *pfile ATTRIBUTE_UNUSED;
68{
69 return 0;
70}
71
72/* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
73 If BUFFER != NULL, then use the LENGTH characters in BUFFER
74 as the new input buffer.
75 Return the new buffer, or NULL on failure. */
76
77cpp_buffer *
78cpp_push_buffer (pfile, buffer, length)
79 cpp_reader *pfile;
80 const U_CHAR *buffer;
81 long length;
82{
83 cpp_buffer *buf = CPP_BUFFER (pfile);
84 cpp_buffer *new;
85 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
86 {
87 cpp_fatal (pfile, "macro or `#include' recursion too deep");
88 return NULL;
89 }
90
91 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
92
93 new->if_stack = pfile->if_stack;
94 new->cleanup = null_cleanup;
95 new->buf = new->cur = buffer;
ff2b53ef 96 new->rlimit = buffer + length;
45b966db 97 new->prev = buf;
ff2b53ef 98 new->mark = NULL;
45b966db
ZW
99 new->line_base = NULL;
100
101 CPP_BUFFER (pfile) = new;
102 return new;
103}
104
105cpp_buffer *
106cpp_pop_buffer (pfile)
107 cpp_reader *pfile;
108{
109 cpp_buffer *buf = CPP_BUFFER (pfile);
110 if (ACTIVE_MARK_P (pfile))
111 cpp_ice (pfile, "mark active in cpp_pop_buffer");
112 (*buf->cleanup) (buf, pfile);
113 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
114 free (buf);
115 pfile->buffer_stack_depth--;
116 return CPP_BUFFER (pfile);
117}
118
119/* Scan until CPP_BUFFER (PFILE) is exhausted into PFILE->token_buffer.
120 Pop the buffer when done. */
121
122void
123cpp_scan_buffer (pfile)
124 cpp_reader *pfile;
125{
126 cpp_buffer *buffer = CPP_BUFFER (pfile);
3a2b2c7a 127 enum cpp_ttype token;
ae79697b 128 if (CPP_OPTION (pfile, no_output))
45b966db
ZW
129 {
130 long old_written = CPP_WRITTEN (pfile);
131 /* In no-output mode, we can ignore everything but directives. */
132 for (;;)
133 {
134 if (! pfile->only_seen_white)
135 _cpp_skip_rest_of_line (pfile);
136 token = cpp_get_token (pfile);
137 if (token == CPP_EOF) /* Should not happen ... */
138 break;
139 if (token == CPP_POP && CPP_BUFFER (pfile) == buffer)
140 {
141 if (CPP_PREV_BUFFER (CPP_BUFFER (pfile)) != NULL)
142 cpp_pop_buffer (pfile);
143 break;
144 }
145 }
146 CPP_SET_WRITTEN (pfile, old_written);
147 }
148 else
149 {
150 for (;;)
151 {
152 token = cpp_get_token (pfile);
153 if (token == CPP_EOF) /* Should not happen ... */
154 break;
155 if (token == CPP_POP && CPP_BUFFER (pfile) == buffer)
156 {
157 if (CPP_PREV_BUFFER (CPP_BUFFER (pfile)) != NULL)
158 cpp_pop_buffer (pfile);
159 break;
160 }
161 }
162 }
163}
164
165/*
166 * Rescan a string (which may have escape marks) into pfile's buffer.
167 * Place the result in pfile->token_buffer.
168 *
169 * The input is copied before it is scanned, so it is safe to pass
170 * it something from the token_buffer that will get overwritten
171 * (because it follows CPP_WRITTEN). This is used by do_include.
172 */
173
174void
175cpp_expand_to_buffer (pfile, buf, length)
176 cpp_reader *pfile;
177 const U_CHAR *buf;
178 int length;
179{
180 register cpp_buffer *ip;
181 U_CHAR *buf1;
182 int save_no_output;
183
184 if (length < 0)
185 {
186 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
187 return;
188 }
189
190 /* Set up the input on the input stack. */
191
192 buf1 = (U_CHAR *) alloca (length + 1);
193 memcpy (buf1, buf, length);
194 buf1[length] = 0;
195
196 ip = cpp_push_buffer (pfile, buf1, length);
197 if (ip == NULL)
198 return;
199 ip->has_escapes = 1;
200
201 /* Scan the input, create the output. */
ae79697b
ZW
202 save_no_output = CPP_OPTION (pfile, no_output);
203 CPP_OPTION (pfile, no_output) = 0;
204 CPP_OPTION (pfile, no_line_commands)++;
45b966db 205 cpp_scan_buffer (pfile);
ae79697b
ZW
206 CPP_OPTION (pfile, no_line_commands)--;
207 CPP_OPTION (pfile, no_output) = save_no_output;
45b966db
ZW
208
209 CPP_NUL_TERMINATE (pfile);
210}
211
45b966db
ZW
212/* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
213
214cpp_buffer *
215cpp_file_buffer (pfile)
216 cpp_reader *pfile;
217{
218 cpp_buffer *ip;
219
220 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
221 if (ip->ihash != NULL)
222 return ip;
223 return NULL;
224}
225
226/* Skip a C-style block comment. We know it's a comment, and point is
227 at the second character of the starter. */
228static void
229skip_block_comment (pfile)
230 cpp_reader *pfile;
231{
3a2b2c7a 232 unsigned int line, col;
61474454 233 const U_CHAR *limit, *cur;
45b966db
ZW
234
235 FORWARD(1);
3a2b2c7a
ZW
236 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
237 col = CPP_BUF_COL (CPP_BUFFER (pfile));
61474454
NB
238 limit = CPP_BUFFER (pfile)->rlimit;
239 cur = CPP_BUFFER (pfile)->cur;
240
241 while (cur < limit)
45b966db 242 {
61474454
NB
243 char c = *cur++;
244 if (c == '\n' || c == '\r')
45b966db
ZW
245 {
246 /* \r cannot be a macro escape marker here. */
247 if (!ACTIVE_MARK_P (pfile))
61474454
NB
248 CPP_BUMP_LINE_CUR (pfile, cur);
249 }
250 else if (c == '*')
251 {
252 /* Check for teminator. */
253 if (cur < limit && *cur == '/')
254 goto out;
255
256 /* Warn about comment starter embedded in comment. */
257 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
258 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
259 cur - CPP_BUFFER (pfile)->line_base,
260 "'/*' within comment");
45b966db 261 }
45b966db 262 }
61474454
NB
263
264 cpp_error_with_line (pfile, line, col, "unterminated comment");
265 cur--;
266 out:
267 CPP_BUFFER (pfile)->cur = cur + 1;
45b966db
ZW
268}
269
270/* Skip a C++/Chill line comment. We know it's a comment, and point
271 is at the second character of the initiator. */
272static void
273skip_line_comment (pfile)
274 cpp_reader *pfile;
275{
276 FORWARD(1);
277 for (;;)
278 {
279 int c = GETC ();
280
281 /* We don't have to worry about EOF in here. */
282 if (c == '\n')
283 {
284 /* Don't consider final '\n' to be part of comment. */
285 FORWARD(-1);
286 return;
287 }
288 else if (c == '\r')
289 {
290 /* \r cannot be a macro escape marker here. */
291 if (!ACTIVE_MARK_P (pfile))
292 CPP_BUMP_LINE (pfile);
ae79697b 293 if (CPP_OPTION (pfile, warn_comments))
45b966db
ZW
294 cpp_warning (pfile, "backslash-newline within line comment");
295 }
296 }
297}
298
299/* Skip a comment - C, C++, or Chill style. M is the first character
300 of the comment marker. If this really is a comment, skip to its
301 end and return ' '. If this is not a comment, return M (which will
302 be '/' or '-'). */
303
304static int
305skip_comment (pfile, m)
306 cpp_reader *pfile;
307 int m;
308{
309 if (m == '/' && PEEKC() == '*')
310 {
311 skip_block_comment (pfile);
312 return ' ';
313 }
314 else if (m == '/' && PEEKC() == '/')
315 {
316 if (CPP_BUFFER (pfile)->system_header_p)
317 {
318 /* We silently allow C++ comments in system headers, irrespective
319 of conformance mode, because lots of busted systems do that
320 and trying to clean it up in fixincludes is a nightmare. */
321 skip_line_comment (pfile);
322 return ' ';
323 }
ae79697b 324 else if (CPP_OPTION (pfile, cplusplus_comments))
45b966db 325 {
ae79697b 326 if (CPP_OPTION (pfile, c89)
45b966db
ZW
327 && CPP_PEDANTIC (pfile)
328 && ! CPP_BUFFER (pfile)->warned_cplusplus_comments)
329 {
330 cpp_pedwarn (pfile,
331 "C++ style comments are not allowed in ISO C89");
332 cpp_pedwarn (pfile,
333 "(this will be reported only once per input file)");
334 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
335 }
336 skip_line_comment (pfile);
337 return ' ';
338 }
339 else
340 return m;
341 }
342 else if (m == '-' && PEEKC() == '-'
ae79697b 343 && CPP_OPTION (pfile, chill))
45b966db
ZW
344 {
345 skip_line_comment (pfile);
346 return ' ';
347 }
348 else
349 return m;
350}
351
352/* Identical to skip_comment except that it copies the comment into the
353 token_buffer. This is used if !discard_comments. */
354static int
355copy_comment (pfile, m)
356 cpp_reader *pfile;
357 int m;
358{
359 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
360 const U_CHAR *limit;
361
362 if (skip_comment (pfile, m) == m)
363 return m;
364
365 limit = CPP_BUFFER (pfile)->cur;
366 CPP_RESERVE (pfile, limit - start + 2);
367 CPP_PUTC_Q (pfile, m);
368 for (; start <= limit; start++)
369 if (*start != '\r')
370 CPP_PUTC_Q (pfile, *start);
371
372 return ' ';
373}
374
64aaf407
NB
375static void
376null_warning (pfile, count)
377 cpp_reader *pfile;
378 unsigned int count;
379{
380 if (count == 1)
381 cpp_warning (pfile, "embedded null character ignored");
382 else
383 cpp_warning (pfile, "embedded null characters ignored");
384}
385
45b966db
ZW
386/* Skip whitespace \-newline and comments. Does not macro-expand. */
387
388void
389_cpp_skip_hspace (pfile)
390 cpp_reader *pfile;
391{
64aaf407 392 unsigned int null_count = 0;
45b966db 393 int c;
64aaf407 394
45b966db
ZW
395 while (1)
396 {
397 c = GETC();
398 if (c == EOF)
64aaf407 399 goto out;
45b966db
ZW
400 else if (is_hspace(c))
401 {
402 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
403 cpp_pedwarn (pfile, "%s in preprocessing directive",
404 c == '\f' ? "formfeed" : "vertical tab");
64aaf407
NB
405 else if (c == '\0')
406 null_count++;
45b966db
ZW
407 }
408 else if (c == '\r')
409 {
410 /* \r is a backslash-newline marker if !has_escapes, and
411 a deletable-whitespace or no-reexpansion marker otherwise. */
412 if (CPP_BUFFER (pfile)->has_escapes)
413 {
414 if (PEEKC() == ' ')
415 FORWARD(1);
416 else
417 break;
418 }
419 else
420 CPP_BUMP_LINE (pfile);
421 }
422 else if (c == '/' || c == '-')
423 {
424 c = skip_comment (pfile, c);
425 if (c != ' ')
426 break;
427 }
428 else
429 break;
430 }
431 FORWARD(-1);
64aaf407
NB
432 out:
433 if (null_count)
434 null_warning (pfile, null_count);
45b966db
ZW
435}
436
437/* Read and discard the rest of the current line. */
438
439void
440_cpp_skip_rest_of_line (pfile)
441 cpp_reader *pfile;
442{
443 for (;;)
444 {
445 int c = GETC();
446 switch (c)
447 {
448 case '\n':
449 FORWARD(-1);
450 case EOF:
451 return;
452
453 case '\r':
454 if (! CPP_BUFFER (pfile)->has_escapes)
455 CPP_BUMP_LINE (pfile);
456 break;
457
458 case '\'':
459 case '\"':
460 skip_string (pfile, c);
461 break;
462
463 case '/':
464 case '-':
465 skip_comment (pfile, c);
466 break;
467
468 case '\f':
469 case '\v':
470 if (CPP_PEDANTIC (pfile))
471 cpp_pedwarn (pfile, "%s in preprocessing directive",
472 c == '\f' ? "formfeed" : "vertical tab");
473 break;
474
475 }
476 }
477}
478
479/* Parse an identifier starting with C. */
480
481void
482_cpp_parse_name (pfile, c)
483 cpp_reader *pfile;
484 int c;
485{
486 for (;;)
487 {
488 if (! is_idchar(c))
489 {
490 FORWARD (-1);
491 break;
492 }
493
494 if (c == '$' && CPP_PEDANTIC (pfile))
495 cpp_pedwarn (pfile, "`$' in identifier");
496
497 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
498 CPP_PUTC_Q (pfile, c);
499 c = GETC();
500 if (c == EOF)
501 break;
502 }
503 CPP_NUL_TERMINATE_Q (pfile);
504 return;
505}
506
507/* Parse and skip over a string starting with C. A single quoted
508 string is treated like a double -- some programs (e.g., troff) are
509 perverse this way. (However, a single quoted string is not allowed
510 to extend over multiple lines.) */
511static void
512skip_string (pfile, c)
513 cpp_reader *pfile;
514 int c;
515{
3a2b2c7a 516 unsigned int start_line, start_column;
64aaf407 517 unsigned int null_count = 0;
45b966db 518
3a2b2c7a
ZW
519 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
520 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
45b966db
ZW
521 while (1)
522 {
523 int cc = GETC();
524 switch (cc)
525 {
526 case EOF:
527 cpp_error_with_line (pfile, start_line, start_column,
528 "unterminated string or character constant");
529 if (pfile->multiline_string_line != start_line
530 && pfile->multiline_string_line != 0)
531 cpp_error_with_line (pfile,
532 pfile->multiline_string_line, -1,
533 "possible real start of unterminated constant");
534 pfile->multiline_string_line = 0;
64aaf407 535 goto out;
45b966db 536
64aaf407
NB
537 case '\0':
538 null_count++;
539 break;
540
45b966db
ZW
541 case '\n':
542 CPP_BUMP_LINE (pfile);
543 /* In Fortran and assembly language, silently terminate
544 strings of either variety at end of line. This is a
545 kludge around not knowing where comments are in these
546 languages. */
ae79697b
ZW
547 if (CPP_OPTION (pfile, lang_fortran)
548 || CPP_OPTION (pfile, lang_asm))
45b966db
ZW
549 {
550 FORWARD(-1);
64aaf407 551 goto out;
45b966db
ZW
552 }
553 /* Character constants may not extend over multiple lines.
554 In Standard C, neither may strings. We accept multiline
555 strings as an extension. */
556 if (c == '\'')
557 {
558 cpp_error_with_line (pfile, start_line, start_column,
559 "unterminated character constant");
560 FORWARD(-1);
64aaf407 561 goto out;
45b966db
ZW
562 }
563 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
564 cpp_pedwarn_with_line (pfile, start_line, start_column,
565 "string constant runs past end of line");
566 if (pfile->multiline_string_line == 0)
567 pfile->multiline_string_line = start_line;
568 break;
569
570 case '\r':
571 if (CPP_BUFFER (pfile)->has_escapes)
572 {
573 cpp_ice (pfile, "\\r escape inside string constant");
574 FORWARD(1);
575 }
576 else
577 /* Backslash newline is replaced by nothing at all. */
578 CPP_BUMP_LINE (pfile);
579 break;
580
581 case '\\':
582 FORWARD(1);
583 break;
584
585 case '\"':
586 case '\'':
587 if (cc == c)
64aaf407 588 goto out;
45b966db
ZW
589 break;
590 }
591 }
64aaf407
NB
592
593 out:
594 if (null_count == 1)
595 cpp_warning (pfile, "null character in string or character constant");
596 else if (null_count > 1)
597 cpp_warning (pfile, "null characters in string or character constant");
45b966db
ZW
598}
599
600/* Parse a string and copy it to the output. */
601
602static void
603parse_string (pfile, c)
604 cpp_reader *pfile;
605 int c;
606{
607 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
608 const U_CHAR *limit;
609
610 skip_string (pfile, c);
611
612 limit = CPP_BUFFER (pfile)->cur;
613 CPP_RESERVE (pfile, limit - start + 2);
614 CPP_PUTC_Q (pfile, c);
615 for (; start < limit; start++)
616 if (*start != '\r')
617 CPP_PUTC_Q (pfile, *start);
618}
619
620/* Read an assertion into the token buffer, converting to
621 canonical form: `#predicate(a n swe r)' The next non-whitespace
622 character to read should be the first letter of the predicate.
623 Returns 0 for syntax error, 1 for bare predicate, 2 for predicate
624 with answer (see callers for why). In case of 0, an error has been
625 printed. */
626int
627_cpp_parse_assertion (pfile)
628 cpp_reader *pfile;
629{
630 int c, dropwhite;
631 _cpp_skip_hspace (pfile);
632 c = PEEKC();
bfab56e7
ZW
633 if (c == '\n')
634 {
635 cpp_error (pfile, "assertion without predicate");
636 return 0;
637 }
638 else if (! is_idstart(c))
45b966db
ZW
639 {
640 cpp_error (pfile, "assertion predicate is not an identifier");
641 return 0;
642 }
643 CPP_PUTC(pfile, '#');
644 FORWARD(1);
645 _cpp_parse_name (pfile, c);
646
647 c = PEEKC();
648 if (c != '(')
649 {
650 if (is_hspace(c) || c == '\r')
651 _cpp_skip_hspace (pfile);
652 c = PEEKC();
653 }
654 if (c != '(')
655 return 1;
656
657 CPP_PUTC(pfile, '(');
658 FORWARD(1);
659 dropwhite = 1;
660 while ((c = GETC()) != ')')
661 {
662 if (is_space(c))
663 {
664 if (! dropwhite)
665 {
666 CPP_PUTC(pfile, ' ');
667 dropwhite = 1;
668 }
669 }
670 else if (c == '\n' || c == EOF)
671 {
672 if (c == '\n') FORWARD(-1);
673 cpp_error (pfile, "un-terminated assertion answer");
674 return 0;
675 }
676 else if (c == '\r')
677 /* \r cannot be a macro escape here. */
678 CPP_BUMP_LINE (pfile);
679 else
680 {
681 CPP_PUTC (pfile, c);
682 dropwhite = 0;
683 }
684 }
685
686 if (pfile->limit[-1] == ' ')
687 pfile->limit[-1] = ')';
688 else if (pfile->limit[-1] == '(')
689 {
690 cpp_error (pfile, "empty token sequence in assertion");
691 return 0;
692 }
693 else
694 CPP_PUTC (pfile, ')');
695
45b966db
ZW
696 return 2;
697}
698
699/* Get the next token, and add it to the text in pfile->token_buffer.
700 Return the kind of token we got. */
701
3a2b2c7a 702enum cpp_ttype
45b966db
ZW
703_cpp_lex_token (pfile)
704 cpp_reader *pfile;
705{
706 register int c, c2, c3;
3a2b2c7a 707 enum cpp_ttype token;
45b966db
ZW
708
709 get_next:
710 c = GETC();
711 switch (c)
712 {
713 case EOF:
714 return CPP_EOF;
715
716 case '/':
717 if (PEEKC () == '=')
718 goto op2;
719
720 comment:
ae79697b 721 if (CPP_OPTION (pfile, discard_comments))
45b966db
ZW
722 c = skip_comment (pfile, c);
723 else
724 c = copy_comment (pfile, c);
725 if (c != ' ')
726 goto randomchar;
727
728 /* Comments are equivalent to spaces.
729 For -traditional, a comment is equivalent to nothing. */
ff2b53ef 730 if (!CPP_OPTION (pfile, discard_comments))
45b966db 731 return CPP_COMMENT;
ff2b53ef
ZW
732 else if (CPP_TRADITIONAL (pfile)
733 && ! is_space (PEEKC ()))
734 {
735 if (pfile->parsing_define_directive)
736 return CPP_COMMENT;
737 else
738 goto get_next;
739 }
45b966db
ZW
740 else
741 {
742 CPP_PUTC (pfile, c);
743 return CPP_HSPACE;
744 }
745
746 case '#':
747 if (pfile->parsing_if_directive)
748 {
bfab56e7
ZW
749 if (_cpp_parse_assertion (pfile))
750 return CPP_ASSERTION;
751 goto randomchar;
45b966db
ZW
752 }
753
754 if (pfile->parsing_define_directive && ! CPP_TRADITIONAL (pfile))
755 {
756 CPP_RESERVE (pfile, 3);
757 CPP_PUTC_Q (pfile, '#');
758 CPP_NUL_TERMINATE_Q (pfile);
759 if (PEEKC () != '#')
760 return CPP_STRINGIZE;
761
762 FORWARD (1);
763 CPP_PUTC_Q (pfile, '#');
764 CPP_NUL_TERMINATE_Q (pfile);
765 return CPP_TOKPASTE;
766 }
767
768 if (!pfile->only_seen_white)
769 goto randomchar;
45b966db
ZW
770 return CPP_DIRECTIVE;
771
772 case '\"':
773 case '\'':
774 parse_string (pfile, c);
45b966db
ZW
775 return c == '\'' ? CPP_CHAR : CPP_STRING;
776
777 case '$':
ae79697b 778 if (!CPP_OPTION (pfile, dollars_in_ident))
45b966db
ZW
779 goto randomchar;
780 goto letter;
781
782 case ':':
ae79697b 783 if (CPP_OPTION (pfile, cplusplus) && PEEKC () == ':')
45b966db
ZW
784 goto op2;
785 goto randomchar;
786
787 case '&':
788 case '+':
789 case '|':
790 c2 = PEEKC ();
791 if (c2 == c || c2 == '=')
792 goto op2;
793 goto randomchar;
794
795 case '*':
796 case '!':
797 case '%':
798 case '=':
799 case '^':
800 if (PEEKC () == '=')
801 goto op2;
802 goto randomchar;
803
804 case '-':
805 c2 = PEEKC ();
806 if (c2 == '-')
807 {
ae79697b 808 if (CPP_OPTION (pfile, chill))
45b966db
ZW
809 goto comment; /* Chill style comment */
810 else
811 goto op2;
812 }
813 else if (c2 == '=')
814 goto op2;
815 else if (c2 == '>')
816 {
ae79697b 817 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
45b966db
ZW
818 {
819 /* In C++, there's a ->* operator. */
820 token = CPP_OTHER;
45b966db
ZW
821 CPP_RESERVE (pfile, 4);
822 CPP_PUTC_Q (pfile, c);
823 CPP_PUTC_Q (pfile, GETC ());
824 CPP_PUTC_Q (pfile, GETC ());
825 CPP_NUL_TERMINATE_Q (pfile);
826 return token;
827 }
828 goto op2;
829 }
830 goto randomchar;
831
832 case '<':
833 if (pfile->parsing_include_directive)
834 {
835 for (;;)
836 {
837 CPP_PUTC (pfile, c);
838 if (c == '>')
839 break;
840 c = GETC ();
841 if (c == '\n' || c == EOF)
842 {
843 cpp_error (pfile,
844 "missing '>' in `#include <FILENAME>'");
845 break;
846 }
847 else if (c == '\r')
848 {
849 if (!CPP_BUFFER (pfile)->has_escapes)
850 {
851 /* Backslash newline is replaced by nothing. */
852 CPP_ADJUST_WRITTEN (pfile, -1);
853 CPP_BUMP_LINE (pfile);
854 }
855 else
856 {
857 /* We might conceivably get \r- or \r<space> in
858 here. Just delete 'em. */
859 int d = GETC();
860 if (d != '-' && d != ' ')
861 cpp_ice (pfile, "unrecognized escape \\r%c", d);
862 CPP_ADJUST_WRITTEN (pfile, -1);
863 }
864 }
865 }
866 return CPP_STRING;
867 }
868 /* else fall through */
869 case '>':
870 c2 = PEEKC ();
871 if (c2 == '=')
872 goto op2;
873 /* GNU C++ supports MIN and MAX operators <? and >?. */
ae79697b 874 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
45b966db
ZW
875 goto randomchar;
876 FORWARD(1);
877 CPP_RESERVE (pfile, 4);
878 CPP_PUTC (pfile, c);
879 CPP_PUTC (pfile, c2);
880 c3 = PEEKC ();
881 if (c3 == '=')
882 CPP_PUTC_Q (pfile, GETC ());
883 CPP_NUL_TERMINATE_Q (pfile);
45b966db
ZW
884 return CPP_OTHER;
885
886 case '.':
887 c2 = PEEKC ();
888 if (ISDIGIT(c2))
889 {
890 CPP_RESERVE(pfile, 2);
891 CPP_PUTC_Q (pfile, '.');
892 c = GETC ();
893 goto number;
894 }
895
896 /* In C++ there's a .* operator. */
ae79697b 897 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
45b966db
ZW
898 goto op2;
899
900 if (c2 == '.' && PEEKN(1) == '.')
901 {
902 CPP_RESERVE(pfile, 4);
903 CPP_PUTC_Q (pfile, '.');
904 CPP_PUTC_Q (pfile, '.');
905 CPP_PUTC_Q (pfile, '.');
906 FORWARD (2);
907 CPP_NUL_TERMINATE_Q (pfile);
45b966db
ZW
908 return CPP_3DOTS;
909 }
910 goto randomchar;
911
912 op2:
913 token = CPP_OTHER;
45b966db
ZW
914 CPP_RESERVE(pfile, 3);
915 CPP_PUTC_Q (pfile, c);
916 CPP_PUTC_Q (pfile, GETC ());
917 CPP_NUL_TERMINATE_Q (pfile);
918 return token;
919
920 case 'L':
921 c2 = PEEKC ();
922 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
923 {
924 CPP_PUTC (pfile, c);
925 c = GETC ();
926 parse_string (pfile, c);
45b966db
ZW
927 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
928 }
929 goto letter;
930
931 case '0': case '1': case '2': case '3': case '4':
932 case '5': case '6': case '7': case '8': case '9':
933 number:
934 c2 = '.';
935 for (;;)
936 {
937 CPP_RESERVE (pfile, 2);
938 CPP_PUTC_Q (pfile, c);
939 c = PEEKC ();
940 if (c == EOF)
941 break;
942 if (!is_numchar(c) && c != '.'
943 && ((c2 != 'e' && c2 != 'E'
944 && ((c2 != 'p' && c2 != 'P')
ae79697b 945 || CPP_OPTION (pfile, c89)))
45b966db
ZW
946 || (c != '+' && c != '-')))
947 break;
948 FORWARD(1);
949 c2= c;
950 }
951 CPP_NUL_TERMINATE_Q (pfile);
45b966db
ZW
952 return CPP_NUMBER;
953 case 'b': case 'c': case 'd': case 'h': case 'o':
954 case 'B': case 'C': case 'D': case 'H': case 'O':
ae79697b 955 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
45b966db 956 {
45b966db
ZW
957 CPP_RESERVE (pfile, 2);
958 CPP_PUTC_Q (pfile, c);
959 CPP_PUTC_Q (pfile, '\'');
960 FORWARD(1);
961 for (;;)
962 {
963 c = GETC();
964 if (c == EOF)
965 goto chill_number_eof;
966 if (!is_numchar(c))
967 break;
968 CPP_PUTC (pfile, c);
969 }
970 if (c == '\'')
971 {
972 CPP_RESERVE (pfile, 2);
973 CPP_PUTC_Q (pfile, c);
974 CPP_NUL_TERMINATE_Q (pfile);
975 return CPP_STRING;
976 }
977 else
978 {
979 FORWARD(-1);
980 chill_number_eof:
981 CPP_NUL_TERMINATE (pfile);
982 return CPP_NUMBER;
983 }
984 }
985 else
986 goto letter;
987 case '_':
988 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
989 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
990 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
991 case 'x': case 'y': case 'z':
992 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
993 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
994 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
995 case 'Y': case 'Z':
996 letter:
45b966db
ZW
997 _cpp_parse_name (pfile, c);
998 return CPP_MACRO;
999
64aaf407
NB
1000 case ' ': case '\t': case '\v': case '\f': case '\0':
1001 {
1002 int null_count = 0;
1003
1004 for (;;)
1005 {
1006 if (c == '\0')
1007 null_count++;
1008 else
1009 CPP_PUTC (pfile, c);
1010 c = PEEKC ();
1011 if (c == EOF || !is_hspace(c))
1012 break;
1013 FORWARD(1);
1014 }
1015 if (null_count)
1016 null_warning (pfile, null_count);
1017 return CPP_HSPACE;
1018 }
45b966db
ZW
1019
1020 case '\r':
1021 if (CPP_BUFFER (pfile)->has_escapes)
1022 {
1023 c = GETC ();
1024 if (c == '-')
1025 {
1026 if (pfile->output_escapes)
1027 CPP_PUTS (pfile, "\r-", 2);
1028 _cpp_parse_name (pfile, GETC ());
1029 return CPP_NAME;
1030 }
1031 else if (c == ' ')
1032 {
ff2b53ef
ZW
1033 /* "\r " means a space, but only if necessary to prevent
1034 accidental token concatenation. */
45b966db
ZW
1035 CPP_RESERVE (pfile, 2);
1036 if (pfile->output_escapes)
1037 CPP_PUTC_Q (pfile, '\r');
1038 CPP_PUTC_Q (pfile, c);
1039 return CPP_HSPACE;
1040 }
1041 else
1042 {
1043 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1044 goto get_next;
1045 }
1046 }
1047 else
1048 {
1049 /* Backslash newline is ignored. */
1050 CPP_BUMP_LINE (pfile);
1051 goto get_next;
1052 }
1053
1054 case '\n':
1055 CPP_PUTC (pfile, c);
45b966db
ZW
1056 return CPP_VSPACE;
1057
1058 case '(': token = CPP_LPAREN; goto char1;
1059 case ')': token = CPP_RPAREN; goto char1;
1060 case '{': token = CPP_LBRACE; goto char1;
1061 case '}': token = CPP_RBRACE; goto char1;
1062 case ',': token = CPP_COMMA; goto char1;
1063 case ';': token = CPP_SEMICOLON; goto char1;
1064
1065 randomchar:
1066 default:
1067 token = CPP_OTHER;
1068 char1:
45b966db
ZW
1069 CPP_PUTC (pfile, c);
1070 return token;
1071 }
1072}
1073
1074/* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1075 Caller is expected to have checked no_macro_expand. */
1076static int
1077maybe_macroexpand (pfile, written)
1078 cpp_reader *pfile;
1079 long written;
1080{
1081 U_CHAR *macro = pfile->token_buffer + written;
1082 size_t len = CPP_WRITTEN (pfile) - written;
1083 HASHNODE *hp = _cpp_lookup (pfile, macro, len);
1084
1085 if (!hp)
1086 return 0;
1087 if (hp->type == T_DISABLED)
1088 {
1089 if (pfile->output_escapes)
1090 {
1091 /* Insert a no-reexpand marker before IDENT. */
1092 CPP_RESERVE (pfile, 2);
1093 CPP_ADJUST_WRITTEN (pfile, 2);
1094 macro = pfile->token_buffer + written;
1095
1096 memmove (macro + 2, macro, len);
1097 macro[0] = '\r';
1098 macro[1] = '-';
1099 }
1100 return 0;
1101 }
ff2b53ef
ZW
1102 if (hp->type == T_EMPTY)
1103 {
1104 /* Special case optimization: macro expands to nothing. */
1105 CPP_SET_WRITTEN (pfile, written);
1106 CPP_PUTC_Q (pfile, ' ');
1107 return 1;
1108 }
45b966db
ZW
1109
1110 /* If macro wants an arglist, verify that a '(' follows. */
1111 if (hp->type == T_MACRO && hp->value.defn->nargs >= 0)
1112 {
1113 int macbuf_whitespace = 0;
1114 int c;
1115
1116 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1117 {
1118 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1119 for (;;)
1120 {
1121 _cpp_skip_hspace (pfile);
1122 c = PEEKC ();
1123 if (c == '\n')
1124 FORWARD(1);
1125 else
1126 break;
1127 }
1128 if (point != CPP_BUFFER (pfile)->cur)
1129 macbuf_whitespace = 1;
1130 if (c == '(')
1131 goto is_macro_call;
1132 else if (c != EOF)
1133 goto not_macro_call;
1134 cpp_pop_buffer (pfile);
1135 }
1136
1137 CPP_SET_MARK (pfile);
1138 for (;;)
1139 {
1140 _cpp_skip_hspace (pfile);
1141 c = PEEKC ();
1142 if (c == '\n')
1143 FORWARD(1);
1144 else
1145 break;
1146 }
1147 CPP_GOTO_MARK (pfile);
1148
1149 if (c != '(')
1150 {
1151 not_macro_call:
1152 if (macbuf_whitespace)
1153 CPP_PUTC (pfile, ' ');
1154 return 0;
1155 }
1156 }
1157
1158 is_macro_call:
1159 /* This is now known to be a macro call.
1160 Expand the macro, reading arguments as needed,
1161 and push the expansion on the input stack. */
1162 _cpp_macroexpand (pfile, hp);
1163 CPP_SET_WRITTEN (pfile, written);
1164 return 1;
1165}
1166
3a2b2c7a 1167enum cpp_ttype
45b966db
ZW
1168cpp_get_token (pfile)
1169 cpp_reader *pfile;
1170{
3a2b2c7a 1171 enum cpp_ttype token;
45b966db
ZW
1172 long written = CPP_WRITTEN (pfile);
1173
1174 get_next:
1175 token = _cpp_lex_token (pfile);
1176
1177 switch (token)
1178 {
1179 default:
ff2b53ef
ZW
1180 pfile->potential_control_macro = 0;
1181 pfile->only_seen_white = 0;
1182 return token;
1183
1184 case CPP_VSPACE:
1185 if (pfile->only_seen_white == 0)
1186 pfile->only_seen_white = 1;
1187 CPP_BUMP_LINE (pfile);
1188 if (! CPP_OPTION (pfile, no_line_commands))
1189 {
1190 pfile->lineno++;
1191 if (CPP_BUFFER (pfile)->lineno != pfile->lineno)
1192 _cpp_output_line_command (pfile, same_file);
1193 }
1194 return token;
1195
1196 case CPP_HSPACE:
1197 case CPP_COMMENT:
45b966db
ZW
1198 return token;
1199
1200 case CPP_DIRECTIVE:
ff2b53ef 1201 pfile->potential_control_macro = 0;
45b966db
ZW
1202 if (_cpp_handle_directive (pfile))
1203 return CPP_DIRECTIVE;
1204 pfile->only_seen_white = 0;
1205 CPP_PUTC (pfile, '#');
1206 return CPP_OTHER;
1207
1208 case CPP_MACRO:
ff2b53ef
ZW
1209 pfile->potential_control_macro = 0;
1210 pfile->only_seen_white = 0;
45b966db
ZW
1211 if (! pfile->no_macro_expand
1212 && maybe_macroexpand (pfile, written))
1213 goto get_next;
1214 return CPP_NAME;
1215
1216 case CPP_EOF:
1217 if (CPP_BUFFER (pfile)->manual_pop)
1218 /* If we've been reading from redirected input, the
1219 frontend will pop the buffer. */
1220 return CPP_EOF;
1221 else if (CPP_BUFFER (pfile)->seen_eof)
1222 {
1223 if (CPP_PREV_BUFFER (CPP_BUFFER (pfile)) == NULL)
1224 return CPP_EOF;
1225
1226 cpp_pop_buffer (pfile);
1227 goto get_next;
1228 }
1229 else
1230 {
1231 _cpp_handle_eof (pfile);
1232 return CPP_POP;
1233 }
1234 }
1235}
1236
1237/* Like cpp_get_token, but skip spaces and comments. */
1238
3a2b2c7a 1239enum cpp_ttype
45b966db
ZW
1240cpp_get_non_space_token (pfile)
1241 cpp_reader *pfile;
1242{
1243 int old_written = CPP_WRITTEN (pfile);
1244 for (;;)
1245 {
3a2b2c7a 1246 enum cpp_ttype token = cpp_get_token (pfile);
ff2b53ef 1247 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
45b966db
ZW
1248 return token;
1249 CPP_SET_WRITTEN (pfile, old_written);
1250 }
1251}
1252
ff2b53ef
ZW
1253/* Like cpp_get_token, except that it does not execute directives,
1254 does not consume vertical space, and automatically pops off macro
1255 buffers.
45b966db 1256
ff2b53ef
ZW
1257 XXX This function will exist only till collect_expansion doesn't
1258 need to see whitespace anymore, then it'll be merged with
1259 _cpp_get_directive_token (below). */
3a2b2c7a 1260enum cpp_ttype
ff2b53ef 1261_cpp_get_define_token (pfile)
45b966db
ZW
1262 cpp_reader *pfile;
1263{
ff2b53ef 1264 long old_written;
3a2b2c7a 1265 enum cpp_ttype token;
45b966db 1266
ff2b53ef
ZW
1267 get_next:
1268 old_written = CPP_WRITTEN (pfile);
1269 token = _cpp_lex_token (pfile);
1270 switch (token)
45b966db 1271 {
ff2b53ef
ZW
1272 default:
1273 return token;
45b966db 1274
ff2b53ef
ZW
1275 case CPP_VSPACE:
1276 /* Put it back and return VSPACE. */
1277 FORWARD(-1);
1278 CPP_ADJUST_WRITTEN (pfile, -1);
1279 return CPP_VSPACE;
45b966db 1280
ff2b53ef
ZW
1281 case CPP_HSPACE:
1282 if (CPP_PEDANTIC (pfile))
45b966db 1283 {
ff2b53ef
ZW
1284 U_CHAR *p, *limit;
1285 p = pfile->token_buffer + old_written;
1286 limit = CPP_PWRITTEN (pfile);
1287 while (p < limit)
1288 {
1289 if (*p == '\v' || *p == '\f')
1290 cpp_pedwarn (pfile, "%s in preprocessing directive",
1291 *p == '\f' ? "formfeed" : "vertical tab");
1292 p++;
1293 }
45b966db 1294 }
ff2b53ef 1295 return CPP_HSPACE;
45b966db 1296
ff2b53ef
ZW
1297 case CPP_DIRECTIVE:
1298 /* Don't execute the directive, but don't smash it to OTHER either. */
1299 CPP_PUTC (pfile, '#');
1300 return CPP_DIRECTIVE;
1301
1302 case CPP_MACRO:
1303 if (! pfile->no_macro_expand
1304 && maybe_macroexpand (pfile, old_written))
1305 goto get_next;
1306 return CPP_NAME;
45b966db 1307
ff2b53ef
ZW
1308 case CPP_EOF:
1309 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
45b966db 1310 {
ff2b53ef
ZW
1311 cpp_pop_buffer (pfile);
1312 goto get_next;
45b966db 1313 }
ff2b53ef
ZW
1314 else
1315 /* This can happen for files that don't end with a newline,
1316 and for cpp_define and friends. Pretend they do, so
1317 callers don't have to deal. A warning will be issued by
1318 someone else, if necessary. */
1319 return CPP_VSPACE;
1320 }
1321}
1322
1323/* Just like _cpp_get_define_token except that it discards horizontal
1324 whitespace. */
45b966db 1325
3a2b2c7a 1326enum cpp_ttype
ff2b53ef
ZW
1327_cpp_get_directive_token (pfile)
1328 cpp_reader *pfile;
1329{
1330 int old_written = CPP_WRITTEN (pfile);
1331 for (;;)
1332 {
3a2b2c7a 1333 enum cpp_ttype token = _cpp_get_define_token (pfile);
ff2b53ef
ZW
1334 if (token != CPP_COMMENT && token != CPP_HSPACE)
1335 return token;
1336 CPP_SET_WRITTEN (pfile, old_written);
45b966db
ZW
1337 }
1338}
1339
1340/* Determine the current line and column. Used only by read_and_prescan. */
1341static U_CHAR *
1342find_position (start, limit, linep)
1343 U_CHAR *start;
1344 U_CHAR *limit;
1345 unsigned long *linep;
1346{
1347 unsigned long line = *linep;
1348 U_CHAR *lbase = start;
1349 while (start < limit)
1350 {
1351 U_CHAR ch = *start++;
1352 if (ch == '\n' || ch == '\r')
1353 {
1354 line++;
1355 lbase = start;
1356 }
1357 }
1358 *linep = line;
1359 return lbase;
1360}
1361
2a87fbe8
ZW
1362/* The following table is used by _cpp_read_and_prescan. If we have
1363 designated initializers, it can be constant data; otherwise, it is
1364 set up at runtime by _cpp_init_input_buffer. */
46d07497
ZW
1365
1366#ifndef UCHAR_MAX
1367#define UCHAR_MAX 255 /* assume 8-bit bytes */
1368#endif
1369
1370#if (GCC_VERSION >= 2007) || (__STDC_VERSION__ >= 199901L)
2a87fbe8
ZW
1371#define init_chartab() /* nothing */
1372#define CHARTAB static const unsigned char chartab[UCHAR_MAX + 1] = {
46d07497
ZW
1373#define END };
1374#define s(p, v) [p] = v,
1375#else
2a87fbe8
ZW
1376#define CHARTAB static unsigned char chartab[UCHAR_MAX + 1] = { 0 }; \
1377 static void init_chartab PARAMS ((void)) { \
1378 unsigned char *x = chartab;
46d07497
ZW
1379#define END }
1380#define s(p, v) x[p] = v;
1381#endif
1382
1383/* Table of characters that can't be handled in the inner loop.
2a87fbe8
ZW
1384 Also contains the mapping between trigraph third characters and their
1385 replacements. */
46d07497
ZW
1386#define SPECCASE_CR 1
1387#define SPECCASE_BACKSLASH 2
1388#define SPECCASE_QUESTION 3
1389
2a87fbe8 1390CHARTAB
46d07497
ZW
1391 s('\r', SPECCASE_CR)
1392 s('\\', SPECCASE_BACKSLASH)
1393 s('?', SPECCASE_QUESTION)
46d07497 1394
46d07497
ZW
1395 s('=', '#') s(')', ']') s('!', '|')
1396 s('(', '[') s('\'', '^') s('>', '}')
1397 s('/', '\\') s('<', '{') s('-', '~')
1398END
1399
1400#undef CHARTAB
46d07497
ZW
1401#undef END
1402#undef s
1403
2a87fbe8
ZW
1404#define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1405#define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1406
45b966db
ZW
1407/* Read the entire contents of file DESC into buffer BUF. LEN is how
1408 much memory to allocate initially; more will be allocated if
1409 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1410 canonical form (\n). If enabled, convert and/or warn about
1411 trigraphs. Convert backslash-newline to a one-character escape
1412 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1413 token). If there is no newline at the end of the file, add one and
1414 warn. Returns -1 on failure, or the actual length of the data to
1415 be scanned.
1416
1417 This function does a lot of work, and can be a serious performance
1418 bottleneck. It has been tuned heavily; make sure you understand it
1419 before hacking. The common case - no trigraphs, Unix style line
1420 breaks, backslash-newline set off by whitespace, newline at EOF -
1421 has been optimized at the expense of the others. The performance
1422 penalty for DOS style line breaks (\r\n) is about 15%.
1423
1424 Warnings lose particularly heavily since we have to determine the
1425 line number, which involves scanning from the beginning of the file
1426 or from the last warning. The penalty for the absence of a newline
1427 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1428
1429 If your file has more than one kind of end-of-line marker, you
04e3ec78
NB
1430 will get messed-up line numbering.
1431
1432 So that the cases of the switch statement do not have to concern
1433 themselves with the complications of reading beyond the end of the
1434 buffer, the buffer is guaranteed to have at least 3 characters in
1435 it (or however many are left in the file, if less) on entry to the
1436 switch. This is enough to handle trigraphs and the "\\\n\r" and
1437 "\\\r\n" cases.
1438
1439 The end of the buffer is marked by a '\\', which, being a special
1440 character, guarantees we will exit the fast-scan loops and perform
1441 a refill. */
46d07497 1442
45b966db
ZW
1443long
1444_cpp_read_and_prescan (pfile, fp, desc, len)
1445 cpp_reader *pfile;
1446 cpp_buffer *fp;
1447 int desc;
1448 size_t len;
1449{
1450 U_CHAR *buf = (U_CHAR *) xmalloc (len);
1451 U_CHAR *ip, *op, *line_base;
1452 U_CHAR *ibase;
45b966db
ZW
1453 unsigned long line;
1454 unsigned int deferred_newlines;
45b966db 1455 size_t offset;
04e3ec78 1456 int count = 0;
45b966db
ZW
1457
1458 offset = 0;
04e3ec78 1459 deferred_newlines = 0;
45b966db
ZW
1460 op = buf;
1461 line_base = buf;
1462 line = 1;
04e3ec78
NB
1463 ibase = pfile->input_buffer + 3;
1464 ip = ibase;
1465 ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
45b966db
ZW
1466
1467 for (;;)
1468 {
04e3ec78
NB
1469 U_CHAR *near_buff_end;
1470
1471 /* Copy previous char plus unprocessed (at most 2) chars
1472 to beginning of buffer, refill it with another
1473 read(), and continue processing */
1474 memcpy(ip - count - 1, ip - 1, 3);
1475 ip -= count;
45b966db 1476
04e3ec78 1477 count = read (desc, ibase, pfile->input_buffer_len);
45b966db
ZW
1478 if (count < 0)
1479 goto error;
04e3ec78
NB
1480
1481 ibase[count] = '\\'; /* Marks end of buffer */
1482 if (count)
45b966db 1483 {
04e3ec78
NB
1484 near_buff_end = pfile->input_buffer + count;
1485 offset += count;
45b966db 1486 if (offset > len)
04e3ec78
NB
1487 {
1488 size_t delta_op;
1489 size_t delta_line_base;
1490 len *= 2;
1491 if (offset > len)
1492 /* len overflowed.
1493 This could happen if the file is larger than half the
1494 maximum address space of the machine. */
1495 goto too_big;
1496
1497 delta_op = op - buf;
1498 delta_line_base = line_base - buf;
1499 buf = (U_CHAR *) xrealloc (buf, len);
1500 op = buf + delta_op;
1501 line_base = buf + delta_line_base;
1502 }
1503 }
1504 else
1505 {
1506 if (ip == ibase)
1507 break;
1508 /* Allow normal processing of the (at most 2) remaining
1509 characters. The end-of-buffer marker is still present
1510 and prevents false matches within the switch. */
1511 near_buff_end = ibase - 1;
45b966db
ZW
1512 }
1513
1514 for (;;)
1515 {
04e3ec78 1516 unsigned int span;
45b966db 1517
04e3ec78 1518 /* Deal with \-newline, potentially in the middle of a token. */
45b966db
ZW
1519 if (deferred_newlines)
1520 {
2a87fbe8 1521 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
04e3ec78
NB
1522 {
1523 /* Previous was not white space. Skip to white
1524 space, if we can, before outputting the \r's */
1525 span = 0;
1526 while (ip[span] != ' '
1527 && ip[span] != '\t'
1528 && ip[span] != '\n'
2a87fbe8 1529 && NORMAL(ip[span]))
04e3ec78
NB
1530 span++;
1531 memcpy (op, ip, span);
1532 op += span;
1533 ip += span;
2a87fbe8 1534 if (! NORMAL(ip[0]))
04e3ec78
NB
1535 goto do_speccase;
1536 }
1537 while (deferred_newlines)
1538 deferred_newlines--, *op++ = '\r';
45b966db
ZW
1539 }
1540
1541 /* Copy as much as we can without special treatment. */
04e3ec78 1542 span = 0;
2a87fbe8 1543 while (NORMAL (ip[span])) span++;
45b966db
ZW
1544 memcpy (op, ip, span);
1545 op += span;
1546 ip += span;
1547
04e3ec78
NB
1548 do_speccase:
1549 if (ip > near_buff_end) /* Do we have enough chars? */
1550 break;
2a87fbe8 1551 switch (chartab[*ip++])
45b966db 1552 {
45b966db 1553 case SPECCASE_CR: /* \r */
04e3ec78 1554 if (ip[-2] != '\n')
45b966db 1555 {
04e3ec78
NB
1556 if (*ip == '\n')
1557 ip++;
1558 *op++ = '\n';
45b966db 1559 }
45b966db
ZW
1560 break;
1561
1562 case SPECCASE_BACKSLASH: /* \ */
04e3ec78 1563 if (*ip == '\n')
45b966db 1564 {
04e3ec78 1565 deferred_newlines++;
45b966db
ZW
1566 ip++;
1567 if (*ip == '\r') ip++;
45b966db
ZW
1568 }
1569 else if (*ip == '\r')
1570 {
04e3ec78 1571 deferred_newlines++;
45b966db
ZW
1572 ip++;
1573 if (*ip == '\n') ip++;
45b966db
ZW
1574 }
1575 else
1576 *op++ = '\\';
04e3ec78 1577 break;
45b966db
ZW
1578
1579 case SPECCASE_QUESTION: /* ? */
1580 {
1581 unsigned int d, t;
04e3ec78
NB
1582
1583 *op++ = '?'; /* Normal non-trigraph case */
1584 if (ip[0] != '?')
1585 break;
1586
45b966db 1587 d = ip[1];
2a87fbe8
ZW
1588 t = chartab[d];
1589 if (NONTRI (t))
04e3ec78 1590 break;
45b966db 1591
ae79697b 1592 if (CPP_OPTION (pfile, warn_trigraphs))
45b966db
ZW
1593 {
1594 unsigned long col;
1595 line_base = find_position (line_base, op, &line);
1596 col = op - line_base + 1;
ae79697b 1597 if (CPP_OPTION (pfile, trigraphs))
45b966db 1598 cpp_warning_with_line (pfile, line, col,
04e3ec78 1599 "trigraph ??%c converted to %c", d, t);
45b966db
ZW
1600 else
1601 cpp_warning_with_line (pfile, line, col,
04e3ec78 1602 "trigraph ??%c ignored", d);
45b966db 1603 }
04e3ec78
NB
1604
1605 ip += 2;
ae79697b 1606 if (CPP_OPTION (pfile, trigraphs))
45b966db 1607 {
04e3ec78 1608 op[-1] = t; /* Overwrite '?' */
45b966db 1609 if (t == '\\')
04e3ec78
NB
1610 {
1611 op--;
1612 *--ip = '\\';
1613 goto do_speccase; /* May need buffer refill */
1614 }
45b966db
ZW
1615 }
1616 else
1617 {
45b966db
ZW
1618 *op++ = '?';
1619 *op++ = d;
1620 }
1621 }
04e3ec78 1622 break;
45b966db
ZW
1623 }
1624 }
1625 }
1626
1627 if (offset == 0)
1628 return 0;
1629
45b966db
ZW
1630 if (op[-1] != '\n')
1631 {
1632 unsigned long col;
1633 line_base = find_position (line_base, op, &line);
1634 col = op - line_base + 1;
1635 cpp_warning_with_line (pfile, line, col, "no newline at end of file\n");
1636 if (offset + 1 > len)
1637 {
1638 len += 1;
1639 if (offset + 1 > len)
1640 goto too_big;
1641 buf = (U_CHAR *) xrealloc (buf, len);
1642 op = buf + offset;
1643 }
1644 *op++ = '\n';
1645 }
1646
1647 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
1648 return op - buf;
1649
1650 too_big:
1651 cpp_error (pfile, "file is too large (>%lu bytes)\n", (unsigned long)offset);
1652 free (buf);
1653 return -1;
1654
1655 error:
1656 cpp_error_from_errno (pfile, fp->ihash->name);
1657 free (buf);
1658 return -1;
1659}
1660
2a87fbe8
ZW
1661/* Allocate pfile->input_buffer, and initialize chartab[]
1662 if it hasn't happened already. */
46d07497 1663
45b966db
ZW
1664void
1665_cpp_init_input_buffer (pfile)
1666 cpp_reader *pfile;
1667{
1668 U_CHAR *tmp;
1669
2a87fbe8 1670 init_chartab ();
04e3ec78 1671
45b966db
ZW
1672 /* Determine the appropriate size for the input buffer. Normal C
1673 source files are smaller than eight K. */
04e3ec78
NB
1674 /* 8Kbytes of buffer proper, 1 to detect running off the end without
1675 address arithmetic all the time, and 3 for pushback during buffer
1676 refill, in case there's a potential trigraph or end-of-line
1677 digraph at the end of a block. */
45b966db 1678
04e3ec78 1679 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
45b966db
ZW
1680 pfile->input_buffer = tmp;
1681 pfile->input_buffer_len = 8192;
1682}
This page took 0.254966 seconds and 5 git commands to generate.