]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
Daily bump.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 2, or (at your option) any
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; if not, write to the Free Software
20Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
21
22#include "config.h"
23#include "system.h"
24#include "intl.h"
25#include "cpplib.h"
26#include "cpphash.h"
27
ff2b53ef
ZW
28#define PEEKBUF(BUFFER, N) \
29 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
30#define GETBUF(BUFFER) \
31 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
32#define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
33
34#define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
35#define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
36#define GETC() GETBUF (CPP_BUFFER (pfile))
37#define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
45b966db
ZW
38
39static void skip_block_comment PARAMS ((cpp_reader *));
40static void skip_line_comment PARAMS ((cpp_reader *));
41static int maybe_macroexpand PARAMS ((cpp_reader *, long));
42static int skip_comment PARAMS ((cpp_reader *, int));
43static int copy_comment PARAMS ((cpp_reader *, int));
44static void skip_string PARAMS ((cpp_reader *, int));
45static void parse_string PARAMS ((cpp_reader *, int));
46static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
47static int null_cleanup PARAMS ((cpp_buffer *, cpp_reader *));
64aaf407 48static void null_warning PARAMS ((cpp_reader *, unsigned int));
45b966db
ZW
49
50/* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
51
52void
53_cpp_grow_token_buffer (pfile, n)
54 cpp_reader *pfile;
55 long n;
56{
57 long old_written = CPP_WRITTEN (pfile);
58 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
59 pfile->token_buffer = (U_CHAR *)
60 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
61 CPP_SET_WRITTEN (pfile, old_written);
62}
63
64static int
65null_cleanup (pbuf, pfile)
66 cpp_buffer *pbuf ATTRIBUTE_UNUSED;
67 cpp_reader *pfile ATTRIBUTE_UNUSED;
68{
69 return 0;
70}
71
72/* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
73 If BUFFER != NULL, then use the LENGTH characters in BUFFER
74 as the new input buffer.
75 Return the new buffer, or NULL on failure. */
76
77cpp_buffer *
78cpp_push_buffer (pfile, buffer, length)
79 cpp_reader *pfile;
80 const U_CHAR *buffer;
81 long length;
82{
83 cpp_buffer *buf = CPP_BUFFER (pfile);
84 cpp_buffer *new;
85 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
86 {
87 cpp_fatal (pfile, "macro or `#include' recursion too deep");
88 return NULL;
89 }
90
91 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
92
93 new->if_stack = pfile->if_stack;
94 new->cleanup = null_cleanup;
95 new->buf = new->cur = buffer;
ff2b53ef 96 new->rlimit = buffer + length;
45b966db 97 new->prev = buf;
ff2b53ef 98 new->mark = NULL;
45b966db
ZW
99 new->line_base = NULL;
100
101 CPP_BUFFER (pfile) = new;
102 return new;
103}
104
105cpp_buffer *
106cpp_pop_buffer (pfile)
107 cpp_reader *pfile;
108{
109 cpp_buffer *buf = CPP_BUFFER (pfile);
110 if (ACTIVE_MARK_P (pfile))
111 cpp_ice (pfile, "mark active in cpp_pop_buffer");
112 (*buf->cleanup) (buf, pfile);
113 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
114 free (buf);
115 pfile->buffer_stack_depth--;
116 return CPP_BUFFER (pfile);
117}
118
119/* Scan until CPP_BUFFER (PFILE) is exhausted into PFILE->token_buffer.
120 Pop the buffer when done. */
121
122void
123cpp_scan_buffer (pfile)
124 cpp_reader *pfile;
125{
126 cpp_buffer *buffer = CPP_BUFFER (pfile);
127 enum cpp_token token;
ae79697b 128 if (CPP_OPTION (pfile, no_output))
45b966db
ZW
129 {
130 long old_written = CPP_WRITTEN (pfile);
131 /* In no-output mode, we can ignore everything but directives. */
132 for (;;)
133 {
134 if (! pfile->only_seen_white)
135 _cpp_skip_rest_of_line (pfile);
136 token = cpp_get_token (pfile);
137 if (token == CPP_EOF) /* Should not happen ... */
138 break;
139 if (token == CPP_POP && CPP_BUFFER (pfile) == buffer)
140 {
141 if (CPP_PREV_BUFFER (CPP_BUFFER (pfile)) != NULL)
142 cpp_pop_buffer (pfile);
143 break;
144 }
145 }
146 CPP_SET_WRITTEN (pfile, old_written);
147 }
148 else
149 {
150 for (;;)
151 {
152 token = cpp_get_token (pfile);
153 if (token == CPP_EOF) /* Should not happen ... */
154 break;
155 if (token == CPP_POP && CPP_BUFFER (pfile) == buffer)
156 {
157 if (CPP_PREV_BUFFER (CPP_BUFFER (pfile)) != NULL)
158 cpp_pop_buffer (pfile);
159 break;
160 }
161 }
162 }
163}
164
165/*
166 * Rescan a string (which may have escape marks) into pfile's buffer.
167 * Place the result in pfile->token_buffer.
168 *
169 * The input is copied before it is scanned, so it is safe to pass
170 * it something from the token_buffer that will get overwritten
171 * (because it follows CPP_WRITTEN). This is used by do_include.
172 */
173
174void
175cpp_expand_to_buffer (pfile, buf, length)
176 cpp_reader *pfile;
177 const U_CHAR *buf;
178 int length;
179{
180 register cpp_buffer *ip;
181 U_CHAR *buf1;
182 int save_no_output;
183
184 if (length < 0)
185 {
186 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
187 return;
188 }
189
190 /* Set up the input on the input stack. */
191
192 buf1 = (U_CHAR *) alloca (length + 1);
193 memcpy (buf1, buf, length);
194 buf1[length] = 0;
195
196 ip = cpp_push_buffer (pfile, buf1, length);
197 if (ip == NULL)
198 return;
199 ip->has_escapes = 1;
200
201 /* Scan the input, create the output. */
ae79697b
ZW
202 save_no_output = CPP_OPTION (pfile, no_output);
203 CPP_OPTION (pfile, no_output) = 0;
204 CPP_OPTION (pfile, no_line_commands)++;
45b966db 205 cpp_scan_buffer (pfile);
ae79697b
ZW
206 CPP_OPTION (pfile, no_line_commands)--;
207 CPP_OPTION (pfile, no_output) = save_no_output;
45b966db
ZW
208
209 CPP_NUL_TERMINATE (pfile);
210}
211
212void
213cpp_buf_line_and_col (pbuf, linep, colp)
214 register cpp_buffer *pbuf;
215 long *linep, *colp;
216{
217 if (pbuf)
218 {
219 *linep = pbuf->lineno;
220 if (colp)
221 *colp = pbuf->cur - pbuf->line_base;
222 }
223 else
224 {
225 *linep = 0;
226 if (colp)
227 *colp = 0;
228 }
229}
230
231/* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
232
233cpp_buffer *
234cpp_file_buffer (pfile)
235 cpp_reader *pfile;
236{
237 cpp_buffer *ip;
238
239 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
240 if (ip->ihash != NULL)
241 return ip;
242 return NULL;
243}
244
245/* Skip a C-style block comment. We know it's a comment, and point is
246 at the second character of the starter. */
247static void
248skip_block_comment (pfile)
249 cpp_reader *pfile;
250{
45b966db 251 long line, col;
61474454 252 const U_CHAR *limit, *cur;
45b966db
ZW
253
254 FORWARD(1);
255 cpp_buf_line_and_col (CPP_BUFFER (pfile), &line, &col);
61474454
NB
256 limit = CPP_BUFFER (pfile)->rlimit;
257 cur = CPP_BUFFER (pfile)->cur;
258
259 while (cur < limit)
45b966db 260 {
61474454
NB
261 char c = *cur++;
262 if (c == '\n' || c == '\r')
45b966db
ZW
263 {
264 /* \r cannot be a macro escape marker here. */
265 if (!ACTIVE_MARK_P (pfile))
61474454
NB
266 CPP_BUMP_LINE_CUR (pfile, cur);
267 }
268 else if (c == '*')
269 {
270 /* Check for teminator. */
271 if (cur < limit && *cur == '/')
272 goto out;
273
274 /* Warn about comment starter embedded in comment. */
275 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
276 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
277 cur - CPP_BUFFER (pfile)->line_base,
278 "'/*' within comment");
45b966db 279 }
45b966db 280 }
61474454
NB
281
282 cpp_error_with_line (pfile, line, col, "unterminated comment");
283 cur--;
284 out:
285 CPP_BUFFER (pfile)->cur = cur + 1;
45b966db
ZW
286}
287
288/* Skip a C++/Chill line comment. We know it's a comment, and point
289 is at the second character of the initiator. */
290static void
291skip_line_comment (pfile)
292 cpp_reader *pfile;
293{
294 FORWARD(1);
295 for (;;)
296 {
297 int c = GETC ();
298
299 /* We don't have to worry about EOF in here. */
300 if (c == '\n')
301 {
302 /* Don't consider final '\n' to be part of comment. */
303 FORWARD(-1);
304 return;
305 }
306 else if (c == '\r')
307 {
308 /* \r cannot be a macro escape marker here. */
309 if (!ACTIVE_MARK_P (pfile))
310 CPP_BUMP_LINE (pfile);
ae79697b 311 if (CPP_OPTION (pfile, warn_comments))
45b966db
ZW
312 cpp_warning (pfile, "backslash-newline within line comment");
313 }
314 }
315}
316
317/* Skip a comment - C, C++, or Chill style. M is the first character
318 of the comment marker. If this really is a comment, skip to its
319 end and return ' '. If this is not a comment, return M (which will
320 be '/' or '-'). */
321
322static int
323skip_comment (pfile, m)
324 cpp_reader *pfile;
325 int m;
326{
327 if (m == '/' && PEEKC() == '*')
328 {
329 skip_block_comment (pfile);
330 return ' ';
331 }
332 else if (m == '/' && PEEKC() == '/')
333 {
334 if (CPP_BUFFER (pfile)->system_header_p)
335 {
336 /* We silently allow C++ comments in system headers, irrespective
337 of conformance mode, because lots of busted systems do that
338 and trying to clean it up in fixincludes is a nightmare. */
339 skip_line_comment (pfile);
340 return ' ';
341 }
ae79697b 342 else if (CPP_OPTION (pfile, cplusplus_comments))
45b966db 343 {
ae79697b 344 if (CPP_OPTION (pfile, c89)
45b966db
ZW
345 && CPP_PEDANTIC (pfile)
346 && ! CPP_BUFFER (pfile)->warned_cplusplus_comments)
347 {
348 cpp_pedwarn (pfile,
349 "C++ style comments are not allowed in ISO C89");
350 cpp_pedwarn (pfile,
351 "(this will be reported only once per input file)");
352 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
353 }
354 skip_line_comment (pfile);
355 return ' ';
356 }
357 else
358 return m;
359 }
360 else if (m == '-' && PEEKC() == '-'
ae79697b 361 && CPP_OPTION (pfile, chill))
45b966db
ZW
362 {
363 skip_line_comment (pfile);
364 return ' ';
365 }
366 else
367 return m;
368}
369
370/* Identical to skip_comment except that it copies the comment into the
371 token_buffer. This is used if !discard_comments. */
372static int
373copy_comment (pfile, m)
374 cpp_reader *pfile;
375 int m;
376{
377 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
378 const U_CHAR *limit;
379
380 if (skip_comment (pfile, m) == m)
381 return m;
382
383 limit = CPP_BUFFER (pfile)->cur;
384 CPP_RESERVE (pfile, limit - start + 2);
385 CPP_PUTC_Q (pfile, m);
386 for (; start <= limit; start++)
387 if (*start != '\r')
388 CPP_PUTC_Q (pfile, *start);
389
390 return ' ';
391}
392
64aaf407
NB
393static void
394null_warning (pfile, count)
395 cpp_reader *pfile;
396 unsigned int count;
397{
398 if (count == 1)
399 cpp_warning (pfile, "embedded null character ignored");
400 else
401 cpp_warning (pfile, "embedded null characters ignored");
402}
403
45b966db
ZW
404/* Skip whitespace \-newline and comments. Does not macro-expand. */
405
406void
407_cpp_skip_hspace (pfile)
408 cpp_reader *pfile;
409{
64aaf407 410 unsigned int null_count = 0;
45b966db 411 int c;
64aaf407 412
45b966db
ZW
413 while (1)
414 {
415 c = GETC();
416 if (c == EOF)
64aaf407 417 goto out;
45b966db
ZW
418 else if (is_hspace(c))
419 {
420 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
421 cpp_pedwarn (pfile, "%s in preprocessing directive",
422 c == '\f' ? "formfeed" : "vertical tab");
64aaf407
NB
423 else if (c == '\0')
424 null_count++;
45b966db
ZW
425 }
426 else if (c == '\r')
427 {
428 /* \r is a backslash-newline marker if !has_escapes, and
429 a deletable-whitespace or no-reexpansion marker otherwise. */
430 if (CPP_BUFFER (pfile)->has_escapes)
431 {
432 if (PEEKC() == ' ')
433 FORWARD(1);
434 else
435 break;
436 }
437 else
438 CPP_BUMP_LINE (pfile);
439 }
440 else if (c == '/' || c == '-')
441 {
442 c = skip_comment (pfile, c);
443 if (c != ' ')
444 break;
445 }
446 else
447 break;
448 }
449 FORWARD(-1);
64aaf407
NB
450 out:
451 if (null_count)
452 null_warning (pfile, null_count);
45b966db
ZW
453}
454
455/* Read and discard the rest of the current line. */
456
457void
458_cpp_skip_rest_of_line (pfile)
459 cpp_reader *pfile;
460{
461 for (;;)
462 {
463 int c = GETC();
464 switch (c)
465 {
466 case '\n':
467 FORWARD(-1);
468 case EOF:
469 return;
470
471 case '\r':
472 if (! CPP_BUFFER (pfile)->has_escapes)
473 CPP_BUMP_LINE (pfile);
474 break;
475
476 case '\'':
477 case '\"':
478 skip_string (pfile, c);
479 break;
480
481 case '/':
482 case '-':
483 skip_comment (pfile, c);
484 break;
485
486 case '\f':
487 case '\v':
488 if (CPP_PEDANTIC (pfile))
489 cpp_pedwarn (pfile, "%s in preprocessing directive",
490 c == '\f' ? "formfeed" : "vertical tab");
491 break;
492
493 }
494 }
495}
496
497/* Parse an identifier starting with C. */
498
499void
500_cpp_parse_name (pfile, c)
501 cpp_reader *pfile;
502 int c;
503{
504 for (;;)
505 {
506 if (! is_idchar(c))
507 {
508 FORWARD (-1);
509 break;
510 }
511
512 if (c == '$' && CPP_PEDANTIC (pfile))
513 cpp_pedwarn (pfile, "`$' in identifier");
514
515 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
516 CPP_PUTC_Q (pfile, c);
517 c = GETC();
518 if (c == EOF)
519 break;
520 }
521 CPP_NUL_TERMINATE_Q (pfile);
522 return;
523}
524
525/* Parse and skip over a string starting with C. A single quoted
526 string is treated like a double -- some programs (e.g., troff) are
527 perverse this way. (However, a single quoted string is not allowed
528 to extend over multiple lines.) */
529static void
530skip_string (pfile, c)
531 cpp_reader *pfile;
532 int c;
533{
534 long start_line, start_column;
64aaf407 535 unsigned int null_count = 0;
45b966db 536
64aaf407 537 cpp_buf_line_and_col (cpp_file_buffer (pfile), &start_line, &start_column);
45b966db
ZW
538 while (1)
539 {
540 int cc = GETC();
541 switch (cc)
542 {
543 case EOF:
544 cpp_error_with_line (pfile, start_line, start_column,
545 "unterminated string or character constant");
546 if (pfile->multiline_string_line != start_line
547 && pfile->multiline_string_line != 0)
548 cpp_error_with_line (pfile,
549 pfile->multiline_string_line, -1,
550 "possible real start of unterminated constant");
551 pfile->multiline_string_line = 0;
64aaf407 552 goto out;
45b966db 553
64aaf407
NB
554 case '\0':
555 null_count++;
556 break;
557
45b966db
ZW
558 case '\n':
559 CPP_BUMP_LINE (pfile);
560 /* In Fortran and assembly language, silently terminate
561 strings of either variety at end of line. This is a
562 kludge around not knowing where comments are in these
563 languages. */
ae79697b
ZW
564 if (CPP_OPTION (pfile, lang_fortran)
565 || CPP_OPTION (pfile, lang_asm))
45b966db
ZW
566 {
567 FORWARD(-1);
64aaf407 568 goto out;
45b966db
ZW
569 }
570 /* Character constants may not extend over multiple lines.
571 In Standard C, neither may strings. We accept multiline
572 strings as an extension. */
573 if (c == '\'')
574 {
575 cpp_error_with_line (pfile, start_line, start_column,
576 "unterminated character constant");
577 FORWARD(-1);
64aaf407 578 goto out;
45b966db
ZW
579 }
580 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
581 cpp_pedwarn_with_line (pfile, start_line, start_column,
582 "string constant runs past end of line");
583 if (pfile->multiline_string_line == 0)
584 pfile->multiline_string_line = start_line;
585 break;
586
587 case '\r':
588 if (CPP_BUFFER (pfile)->has_escapes)
589 {
590 cpp_ice (pfile, "\\r escape inside string constant");
591 FORWARD(1);
592 }
593 else
594 /* Backslash newline is replaced by nothing at all. */
595 CPP_BUMP_LINE (pfile);
596 break;
597
598 case '\\':
599 FORWARD(1);
600 break;
601
602 case '\"':
603 case '\'':
604 if (cc == c)
64aaf407 605 goto out;
45b966db
ZW
606 break;
607 }
608 }
64aaf407
NB
609
610 out:
611 if (null_count == 1)
612 cpp_warning (pfile, "null character in string or character constant");
613 else if (null_count > 1)
614 cpp_warning (pfile, "null characters in string or character constant");
45b966db
ZW
615}
616
617/* Parse a string and copy it to the output. */
618
619static void
620parse_string (pfile, c)
621 cpp_reader *pfile;
622 int c;
623{
624 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
625 const U_CHAR *limit;
626
627 skip_string (pfile, c);
628
629 limit = CPP_BUFFER (pfile)->cur;
630 CPP_RESERVE (pfile, limit - start + 2);
631 CPP_PUTC_Q (pfile, c);
632 for (; start < limit; start++)
633 if (*start != '\r')
634 CPP_PUTC_Q (pfile, *start);
635}
636
637/* Read an assertion into the token buffer, converting to
638 canonical form: `#predicate(a n swe r)' The next non-whitespace
639 character to read should be the first letter of the predicate.
640 Returns 0 for syntax error, 1 for bare predicate, 2 for predicate
641 with answer (see callers for why). In case of 0, an error has been
642 printed. */
643int
644_cpp_parse_assertion (pfile)
645 cpp_reader *pfile;
646{
647 int c, dropwhite;
648 _cpp_skip_hspace (pfile);
649 c = PEEKC();
bfab56e7
ZW
650 if (c == '\n')
651 {
652 cpp_error (pfile, "assertion without predicate");
653 return 0;
654 }
655 else if (! is_idstart(c))
45b966db
ZW
656 {
657 cpp_error (pfile, "assertion predicate is not an identifier");
658 return 0;
659 }
660 CPP_PUTC(pfile, '#');
661 FORWARD(1);
662 _cpp_parse_name (pfile, c);
663
664 c = PEEKC();
665 if (c != '(')
666 {
667 if (is_hspace(c) || c == '\r')
668 _cpp_skip_hspace (pfile);
669 c = PEEKC();
670 }
671 if (c != '(')
672 return 1;
673
674 CPP_PUTC(pfile, '(');
675 FORWARD(1);
676 dropwhite = 1;
677 while ((c = GETC()) != ')')
678 {
679 if (is_space(c))
680 {
681 if (! dropwhite)
682 {
683 CPP_PUTC(pfile, ' ');
684 dropwhite = 1;
685 }
686 }
687 else if (c == '\n' || c == EOF)
688 {
689 if (c == '\n') FORWARD(-1);
690 cpp_error (pfile, "un-terminated assertion answer");
691 return 0;
692 }
693 else if (c == '\r')
694 /* \r cannot be a macro escape here. */
695 CPP_BUMP_LINE (pfile);
696 else
697 {
698 CPP_PUTC (pfile, c);
699 dropwhite = 0;
700 }
701 }
702
703 if (pfile->limit[-1] == ' ')
704 pfile->limit[-1] = ')';
705 else if (pfile->limit[-1] == '(')
706 {
707 cpp_error (pfile, "empty token sequence in assertion");
708 return 0;
709 }
710 else
711 CPP_PUTC (pfile, ')');
712
45b966db
ZW
713 return 2;
714}
715
716/* Get the next token, and add it to the text in pfile->token_buffer.
717 Return the kind of token we got. */
718
719enum cpp_token
720_cpp_lex_token (pfile)
721 cpp_reader *pfile;
722{
723 register int c, c2, c3;
724 enum cpp_token token;
45b966db
ZW
725
726 get_next:
727 c = GETC();
728 switch (c)
729 {
730 case EOF:
731 return CPP_EOF;
732
733 case '/':
734 if (PEEKC () == '=')
735 goto op2;
736
737 comment:
ae79697b 738 if (CPP_OPTION (pfile, discard_comments))
45b966db
ZW
739 c = skip_comment (pfile, c);
740 else
741 c = copy_comment (pfile, c);
742 if (c != ' ')
743 goto randomchar;
744
745 /* Comments are equivalent to spaces.
746 For -traditional, a comment is equivalent to nothing. */
ff2b53ef 747 if (!CPP_OPTION (pfile, discard_comments))
45b966db 748 return CPP_COMMENT;
ff2b53ef
ZW
749 else if (CPP_TRADITIONAL (pfile)
750 && ! is_space (PEEKC ()))
751 {
752 if (pfile->parsing_define_directive)
753 return CPP_COMMENT;
754 else
755 goto get_next;
756 }
45b966db
ZW
757 else
758 {
759 CPP_PUTC (pfile, c);
760 return CPP_HSPACE;
761 }
762
763 case '#':
764 if (pfile->parsing_if_directive)
765 {
bfab56e7
ZW
766 if (_cpp_parse_assertion (pfile))
767 return CPP_ASSERTION;
768 goto randomchar;
45b966db
ZW
769 }
770
771 if (pfile->parsing_define_directive && ! CPP_TRADITIONAL (pfile))
772 {
773 CPP_RESERVE (pfile, 3);
774 CPP_PUTC_Q (pfile, '#');
775 CPP_NUL_TERMINATE_Q (pfile);
776 if (PEEKC () != '#')
777 return CPP_STRINGIZE;
778
779 FORWARD (1);
780 CPP_PUTC_Q (pfile, '#');
781 CPP_NUL_TERMINATE_Q (pfile);
782 return CPP_TOKPASTE;
783 }
784
785 if (!pfile->only_seen_white)
786 goto randomchar;
45b966db
ZW
787 return CPP_DIRECTIVE;
788
789 case '\"':
790 case '\'':
791 parse_string (pfile, c);
45b966db
ZW
792 return c == '\'' ? CPP_CHAR : CPP_STRING;
793
794 case '$':
ae79697b 795 if (!CPP_OPTION (pfile, dollars_in_ident))
45b966db
ZW
796 goto randomchar;
797 goto letter;
798
799 case ':':
ae79697b 800 if (CPP_OPTION (pfile, cplusplus) && PEEKC () == ':')
45b966db
ZW
801 goto op2;
802 goto randomchar;
803
804 case '&':
805 case '+':
806 case '|':
807 c2 = PEEKC ();
808 if (c2 == c || c2 == '=')
809 goto op2;
810 goto randomchar;
811
812 case '*':
813 case '!':
814 case '%':
815 case '=':
816 case '^':
817 if (PEEKC () == '=')
818 goto op2;
819 goto randomchar;
820
821 case '-':
822 c2 = PEEKC ();
823 if (c2 == '-')
824 {
ae79697b 825 if (CPP_OPTION (pfile, chill))
45b966db
ZW
826 goto comment; /* Chill style comment */
827 else
828 goto op2;
829 }
830 else if (c2 == '=')
831 goto op2;
832 else if (c2 == '>')
833 {
ae79697b 834 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
45b966db
ZW
835 {
836 /* In C++, there's a ->* operator. */
837 token = CPP_OTHER;
45b966db
ZW
838 CPP_RESERVE (pfile, 4);
839 CPP_PUTC_Q (pfile, c);
840 CPP_PUTC_Q (pfile, GETC ());
841 CPP_PUTC_Q (pfile, GETC ());
842 CPP_NUL_TERMINATE_Q (pfile);
843 return token;
844 }
845 goto op2;
846 }
847 goto randomchar;
848
849 case '<':
850 if (pfile->parsing_include_directive)
851 {
852 for (;;)
853 {
854 CPP_PUTC (pfile, c);
855 if (c == '>')
856 break;
857 c = GETC ();
858 if (c == '\n' || c == EOF)
859 {
860 cpp_error (pfile,
861 "missing '>' in `#include <FILENAME>'");
862 break;
863 }
864 else if (c == '\r')
865 {
866 if (!CPP_BUFFER (pfile)->has_escapes)
867 {
868 /* Backslash newline is replaced by nothing. */
869 CPP_ADJUST_WRITTEN (pfile, -1);
870 CPP_BUMP_LINE (pfile);
871 }
872 else
873 {
874 /* We might conceivably get \r- or \r<space> in
875 here. Just delete 'em. */
876 int d = GETC();
877 if (d != '-' && d != ' ')
878 cpp_ice (pfile, "unrecognized escape \\r%c", d);
879 CPP_ADJUST_WRITTEN (pfile, -1);
880 }
881 }
882 }
883 return CPP_STRING;
884 }
885 /* else fall through */
886 case '>':
887 c2 = PEEKC ();
888 if (c2 == '=')
889 goto op2;
890 /* GNU C++ supports MIN and MAX operators <? and >?. */
ae79697b 891 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
45b966db
ZW
892 goto randomchar;
893 FORWARD(1);
894 CPP_RESERVE (pfile, 4);
895 CPP_PUTC (pfile, c);
896 CPP_PUTC (pfile, c2);
897 c3 = PEEKC ();
898 if (c3 == '=')
899 CPP_PUTC_Q (pfile, GETC ());
900 CPP_NUL_TERMINATE_Q (pfile);
45b966db
ZW
901 return CPP_OTHER;
902
903 case '.':
904 c2 = PEEKC ();
905 if (ISDIGIT(c2))
906 {
907 CPP_RESERVE(pfile, 2);
908 CPP_PUTC_Q (pfile, '.');
909 c = GETC ();
910 goto number;
911 }
912
913 /* In C++ there's a .* operator. */
ae79697b 914 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
45b966db
ZW
915 goto op2;
916
917 if (c2 == '.' && PEEKN(1) == '.')
918 {
919 CPP_RESERVE(pfile, 4);
920 CPP_PUTC_Q (pfile, '.');
921 CPP_PUTC_Q (pfile, '.');
922 CPP_PUTC_Q (pfile, '.');
923 FORWARD (2);
924 CPP_NUL_TERMINATE_Q (pfile);
45b966db
ZW
925 return CPP_3DOTS;
926 }
927 goto randomchar;
928
929 op2:
930 token = CPP_OTHER;
45b966db
ZW
931 CPP_RESERVE(pfile, 3);
932 CPP_PUTC_Q (pfile, c);
933 CPP_PUTC_Q (pfile, GETC ());
934 CPP_NUL_TERMINATE_Q (pfile);
935 return token;
936
937 case 'L':
938 c2 = PEEKC ();
939 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
940 {
941 CPP_PUTC (pfile, c);
942 c = GETC ();
943 parse_string (pfile, c);
45b966db
ZW
944 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
945 }
946 goto letter;
947
948 case '0': case '1': case '2': case '3': case '4':
949 case '5': case '6': case '7': case '8': case '9':
950 number:
951 c2 = '.';
952 for (;;)
953 {
954 CPP_RESERVE (pfile, 2);
955 CPP_PUTC_Q (pfile, c);
956 c = PEEKC ();
957 if (c == EOF)
958 break;
959 if (!is_numchar(c) && c != '.'
960 && ((c2 != 'e' && c2 != 'E'
961 && ((c2 != 'p' && c2 != 'P')
ae79697b 962 || CPP_OPTION (pfile, c89)))
45b966db
ZW
963 || (c != '+' && c != '-')))
964 break;
965 FORWARD(1);
966 c2= c;
967 }
968 CPP_NUL_TERMINATE_Q (pfile);
45b966db
ZW
969 return CPP_NUMBER;
970 case 'b': case 'c': case 'd': case 'h': case 'o':
971 case 'B': case 'C': case 'D': case 'H': case 'O':
ae79697b 972 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
45b966db 973 {
45b966db
ZW
974 CPP_RESERVE (pfile, 2);
975 CPP_PUTC_Q (pfile, c);
976 CPP_PUTC_Q (pfile, '\'');
977 FORWARD(1);
978 for (;;)
979 {
980 c = GETC();
981 if (c == EOF)
982 goto chill_number_eof;
983 if (!is_numchar(c))
984 break;
985 CPP_PUTC (pfile, c);
986 }
987 if (c == '\'')
988 {
989 CPP_RESERVE (pfile, 2);
990 CPP_PUTC_Q (pfile, c);
991 CPP_NUL_TERMINATE_Q (pfile);
992 return CPP_STRING;
993 }
994 else
995 {
996 FORWARD(-1);
997 chill_number_eof:
998 CPP_NUL_TERMINATE (pfile);
999 return CPP_NUMBER;
1000 }
1001 }
1002 else
1003 goto letter;
1004 case '_':
1005 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1006 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1007 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1008 case 'x': case 'y': case 'z':
1009 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1010 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1011 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1012 case 'Y': case 'Z':
1013 letter:
45b966db
ZW
1014 _cpp_parse_name (pfile, c);
1015 return CPP_MACRO;
1016
64aaf407
NB
1017 case ' ': case '\t': case '\v': case '\f': case '\0':
1018 {
1019 int null_count = 0;
1020
1021 for (;;)
1022 {
1023 if (c == '\0')
1024 null_count++;
1025 else
1026 CPP_PUTC (pfile, c);
1027 c = PEEKC ();
1028 if (c == EOF || !is_hspace(c))
1029 break;
1030 FORWARD(1);
1031 }
1032 if (null_count)
1033 null_warning (pfile, null_count);
1034 return CPP_HSPACE;
1035 }
45b966db
ZW
1036
1037 case '\r':
1038 if (CPP_BUFFER (pfile)->has_escapes)
1039 {
1040 c = GETC ();
1041 if (c == '-')
1042 {
1043 if (pfile->output_escapes)
1044 CPP_PUTS (pfile, "\r-", 2);
1045 _cpp_parse_name (pfile, GETC ());
1046 return CPP_NAME;
1047 }
1048 else if (c == ' ')
1049 {
ff2b53ef
ZW
1050 /* "\r " means a space, but only if necessary to prevent
1051 accidental token concatenation. */
45b966db
ZW
1052 CPP_RESERVE (pfile, 2);
1053 if (pfile->output_escapes)
1054 CPP_PUTC_Q (pfile, '\r');
1055 CPP_PUTC_Q (pfile, c);
1056 return CPP_HSPACE;
1057 }
1058 else
1059 {
1060 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1061 goto get_next;
1062 }
1063 }
1064 else
1065 {
1066 /* Backslash newline is ignored. */
1067 CPP_BUMP_LINE (pfile);
1068 goto get_next;
1069 }
1070
1071 case '\n':
1072 CPP_PUTC (pfile, c);
45b966db
ZW
1073 return CPP_VSPACE;
1074
1075 case '(': token = CPP_LPAREN; goto char1;
1076 case ')': token = CPP_RPAREN; goto char1;
1077 case '{': token = CPP_LBRACE; goto char1;
1078 case '}': token = CPP_RBRACE; goto char1;
1079 case ',': token = CPP_COMMA; goto char1;
1080 case ';': token = CPP_SEMICOLON; goto char1;
1081
1082 randomchar:
1083 default:
1084 token = CPP_OTHER;
1085 char1:
45b966db
ZW
1086 CPP_PUTC (pfile, c);
1087 return token;
1088 }
1089}
1090
1091/* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1092 Caller is expected to have checked no_macro_expand. */
1093static int
1094maybe_macroexpand (pfile, written)
1095 cpp_reader *pfile;
1096 long written;
1097{
1098 U_CHAR *macro = pfile->token_buffer + written;
1099 size_t len = CPP_WRITTEN (pfile) - written;
1100 HASHNODE *hp = _cpp_lookup (pfile, macro, len);
1101
1102 if (!hp)
1103 return 0;
1104 if (hp->type == T_DISABLED)
1105 {
1106 if (pfile->output_escapes)
1107 {
1108 /* Insert a no-reexpand marker before IDENT. */
1109 CPP_RESERVE (pfile, 2);
1110 CPP_ADJUST_WRITTEN (pfile, 2);
1111 macro = pfile->token_buffer + written;
1112
1113 memmove (macro + 2, macro, len);
1114 macro[0] = '\r';
1115 macro[1] = '-';
1116 }
1117 return 0;
1118 }
ff2b53ef
ZW
1119 if (hp->type == T_EMPTY)
1120 {
1121 /* Special case optimization: macro expands to nothing. */
1122 CPP_SET_WRITTEN (pfile, written);
1123 CPP_PUTC_Q (pfile, ' ');
1124 return 1;
1125 }
45b966db
ZW
1126
1127 /* If macro wants an arglist, verify that a '(' follows. */
1128 if (hp->type == T_MACRO && hp->value.defn->nargs >= 0)
1129 {
1130 int macbuf_whitespace = 0;
1131 int c;
1132
1133 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1134 {
1135 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1136 for (;;)
1137 {
1138 _cpp_skip_hspace (pfile);
1139 c = PEEKC ();
1140 if (c == '\n')
1141 FORWARD(1);
1142 else
1143 break;
1144 }
1145 if (point != CPP_BUFFER (pfile)->cur)
1146 macbuf_whitespace = 1;
1147 if (c == '(')
1148 goto is_macro_call;
1149 else if (c != EOF)
1150 goto not_macro_call;
1151 cpp_pop_buffer (pfile);
1152 }
1153
1154 CPP_SET_MARK (pfile);
1155 for (;;)
1156 {
1157 _cpp_skip_hspace (pfile);
1158 c = PEEKC ();
1159 if (c == '\n')
1160 FORWARD(1);
1161 else
1162 break;
1163 }
1164 CPP_GOTO_MARK (pfile);
1165
1166 if (c != '(')
1167 {
1168 not_macro_call:
1169 if (macbuf_whitespace)
1170 CPP_PUTC (pfile, ' ');
1171 return 0;
1172 }
1173 }
1174
1175 is_macro_call:
1176 /* This is now known to be a macro call.
1177 Expand the macro, reading arguments as needed,
1178 and push the expansion on the input stack. */
1179 _cpp_macroexpand (pfile, hp);
1180 CPP_SET_WRITTEN (pfile, written);
1181 return 1;
1182}
1183
1184enum cpp_token
1185cpp_get_token (pfile)
1186 cpp_reader *pfile;
1187{
1188 enum cpp_token token;
1189 long written = CPP_WRITTEN (pfile);
1190
1191 get_next:
1192 token = _cpp_lex_token (pfile);
1193
1194 switch (token)
1195 {
1196 default:
ff2b53ef
ZW
1197 pfile->potential_control_macro = 0;
1198 pfile->only_seen_white = 0;
1199 return token;
1200
1201 case CPP_VSPACE:
1202 if (pfile->only_seen_white == 0)
1203 pfile->only_seen_white = 1;
1204 CPP_BUMP_LINE (pfile);
1205 if (! CPP_OPTION (pfile, no_line_commands))
1206 {
1207 pfile->lineno++;
1208 if (CPP_BUFFER (pfile)->lineno != pfile->lineno)
1209 _cpp_output_line_command (pfile, same_file);
1210 }
1211 return token;
1212
1213 case CPP_HSPACE:
1214 case CPP_COMMENT:
45b966db
ZW
1215 return token;
1216
1217 case CPP_DIRECTIVE:
ff2b53ef 1218 pfile->potential_control_macro = 0;
45b966db
ZW
1219 if (_cpp_handle_directive (pfile))
1220 return CPP_DIRECTIVE;
1221 pfile->only_seen_white = 0;
1222 CPP_PUTC (pfile, '#');
1223 return CPP_OTHER;
1224
1225 case CPP_MACRO:
ff2b53ef
ZW
1226 pfile->potential_control_macro = 0;
1227 pfile->only_seen_white = 0;
45b966db
ZW
1228 if (! pfile->no_macro_expand
1229 && maybe_macroexpand (pfile, written))
1230 goto get_next;
1231 return CPP_NAME;
1232
1233 case CPP_EOF:
1234 if (CPP_BUFFER (pfile)->manual_pop)
1235 /* If we've been reading from redirected input, the
1236 frontend will pop the buffer. */
1237 return CPP_EOF;
1238 else if (CPP_BUFFER (pfile)->seen_eof)
1239 {
1240 if (CPP_PREV_BUFFER (CPP_BUFFER (pfile)) == NULL)
1241 return CPP_EOF;
1242
1243 cpp_pop_buffer (pfile);
1244 goto get_next;
1245 }
1246 else
1247 {
1248 _cpp_handle_eof (pfile);
1249 return CPP_POP;
1250 }
1251 }
1252}
1253
1254/* Like cpp_get_token, but skip spaces and comments. */
1255
1256enum cpp_token
1257cpp_get_non_space_token (pfile)
1258 cpp_reader *pfile;
1259{
1260 int old_written = CPP_WRITTEN (pfile);
1261 for (;;)
1262 {
1263 enum cpp_token token = cpp_get_token (pfile);
ff2b53ef 1264 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
45b966db
ZW
1265 return token;
1266 CPP_SET_WRITTEN (pfile, old_written);
1267 }
1268}
1269
ff2b53ef
ZW
1270/* Like cpp_get_token, except that it does not execute directives,
1271 does not consume vertical space, and automatically pops off macro
1272 buffers.
45b966db 1273
ff2b53ef
ZW
1274 XXX This function will exist only till collect_expansion doesn't
1275 need to see whitespace anymore, then it'll be merged with
1276 _cpp_get_directive_token (below). */
45b966db 1277enum cpp_token
ff2b53ef 1278_cpp_get_define_token (pfile)
45b966db
ZW
1279 cpp_reader *pfile;
1280{
ff2b53ef 1281 long old_written;
45b966db
ZW
1282 enum cpp_token token;
1283
ff2b53ef
ZW
1284 get_next:
1285 old_written = CPP_WRITTEN (pfile);
1286 token = _cpp_lex_token (pfile);
1287 switch (token)
45b966db 1288 {
ff2b53ef
ZW
1289 default:
1290 return token;
45b966db 1291
ff2b53ef
ZW
1292 case CPP_VSPACE:
1293 /* Put it back and return VSPACE. */
1294 FORWARD(-1);
1295 CPP_ADJUST_WRITTEN (pfile, -1);
1296 return CPP_VSPACE;
45b966db 1297
ff2b53ef
ZW
1298 case CPP_HSPACE:
1299 if (CPP_PEDANTIC (pfile))
45b966db 1300 {
ff2b53ef
ZW
1301 U_CHAR *p, *limit;
1302 p = pfile->token_buffer + old_written;
1303 limit = CPP_PWRITTEN (pfile);
1304 while (p < limit)
1305 {
1306 if (*p == '\v' || *p == '\f')
1307 cpp_pedwarn (pfile, "%s in preprocessing directive",
1308 *p == '\f' ? "formfeed" : "vertical tab");
1309 p++;
1310 }
45b966db 1311 }
ff2b53ef 1312 return CPP_HSPACE;
45b966db 1313
ff2b53ef
ZW
1314 case CPP_DIRECTIVE:
1315 /* Don't execute the directive, but don't smash it to OTHER either. */
1316 CPP_PUTC (pfile, '#');
1317 return CPP_DIRECTIVE;
1318
1319 case CPP_MACRO:
1320 if (! pfile->no_macro_expand
1321 && maybe_macroexpand (pfile, old_written))
1322 goto get_next;
1323 return CPP_NAME;
45b966db 1324
ff2b53ef
ZW
1325 case CPP_EOF:
1326 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
45b966db 1327 {
ff2b53ef
ZW
1328 cpp_pop_buffer (pfile);
1329 goto get_next;
45b966db 1330 }
ff2b53ef
ZW
1331 else
1332 /* This can happen for files that don't end with a newline,
1333 and for cpp_define and friends. Pretend they do, so
1334 callers don't have to deal. A warning will be issued by
1335 someone else, if necessary. */
1336 return CPP_VSPACE;
1337 }
1338}
1339
1340/* Just like _cpp_get_define_token except that it discards horizontal
1341 whitespace. */
45b966db 1342
ff2b53ef
ZW
1343enum cpp_token
1344_cpp_get_directive_token (pfile)
1345 cpp_reader *pfile;
1346{
1347 int old_written = CPP_WRITTEN (pfile);
1348 for (;;)
1349 {
1350 enum cpp_token token = _cpp_get_define_token (pfile);
1351 if (token != CPP_COMMENT && token != CPP_HSPACE)
1352 return token;
1353 CPP_SET_WRITTEN (pfile, old_written);
45b966db
ZW
1354 }
1355}
1356
1357/* Determine the current line and column. Used only by read_and_prescan. */
1358static U_CHAR *
1359find_position (start, limit, linep)
1360 U_CHAR *start;
1361 U_CHAR *limit;
1362 unsigned long *linep;
1363{
1364 unsigned long line = *linep;
1365 U_CHAR *lbase = start;
1366 while (start < limit)
1367 {
1368 U_CHAR ch = *start++;
1369 if (ch == '\n' || ch == '\r')
1370 {
1371 line++;
1372 lbase = start;
1373 }
1374 }
1375 *linep = line;
1376 return lbase;
1377}
1378
2a87fbe8
ZW
1379/* The following table is used by _cpp_read_and_prescan. If we have
1380 designated initializers, it can be constant data; otherwise, it is
1381 set up at runtime by _cpp_init_input_buffer. */
46d07497
ZW
1382
1383#ifndef UCHAR_MAX
1384#define UCHAR_MAX 255 /* assume 8-bit bytes */
1385#endif
1386
1387#if (GCC_VERSION >= 2007) || (__STDC_VERSION__ >= 199901L)
2a87fbe8
ZW
1388#define init_chartab() /* nothing */
1389#define CHARTAB static const unsigned char chartab[UCHAR_MAX + 1] = {
46d07497
ZW
1390#define END };
1391#define s(p, v) [p] = v,
1392#else
2a87fbe8
ZW
1393#define CHARTAB static unsigned char chartab[UCHAR_MAX + 1] = { 0 }; \
1394 static void init_chartab PARAMS ((void)) { \
1395 unsigned char *x = chartab;
46d07497
ZW
1396#define END }
1397#define s(p, v) x[p] = v;
1398#endif
1399
1400/* Table of characters that can't be handled in the inner loop.
2a87fbe8
ZW
1401 Also contains the mapping between trigraph third characters and their
1402 replacements. */
46d07497
ZW
1403#define SPECCASE_CR 1
1404#define SPECCASE_BACKSLASH 2
1405#define SPECCASE_QUESTION 3
1406
2a87fbe8 1407CHARTAB
46d07497
ZW
1408 s('\r', SPECCASE_CR)
1409 s('\\', SPECCASE_BACKSLASH)
1410 s('?', SPECCASE_QUESTION)
46d07497 1411
46d07497
ZW
1412 s('=', '#') s(')', ']') s('!', '|')
1413 s('(', '[') s('\'', '^') s('>', '}')
1414 s('/', '\\') s('<', '{') s('-', '~')
1415END
1416
1417#undef CHARTAB
46d07497
ZW
1418#undef END
1419#undef s
1420
2a87fbe8
ZW
1421#define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1422#define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1423
45b966db
ZW
1424/* Read the entire contents of file DESC into buffer BUF. LEN is how
1425 much memory to allocate initially; more will be allocated if
1426 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1427 canonical form (\n). If enabled, convert and/or warn about
1428 trigraphs. Convert backslash-newline to a one-character escape
1429 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1430 token). If there is no newline at the end of the file, add one and
1431 warn. Returns -1 on failure, or the actual length of the data to
1432 be scanned.
1433
1434 This function does a lot of work, and can be a serious performance
1435 bottleneck. It has been tuned heavily; make sure you understand it
1436 before hacking. The common case - no trigraphs, Unix style line
1437 breaks, backslash-newline set off by whitespace, newline at EOF -
1438 has been optimized at the expense of the others. The performance
1439 penalty for DOS style line breaks (\r\n) is about 15%.
1440
1441 Warnings lose particularly heavily since we have to determine the
1442 line number, which involves scanning from the beginning of the file
1443 or from the last warning. The penalty for the absence of a newline
1444 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1445
1446 If your file has more than one kind of end-of-line marker, you
04e3ec78
NB
1447 will get messed-up line numbering.
1448
1449 So that the cases of the switch statement do not have to concern
1450 themselves with the complications of reading beyond the end of the
1451 buffer, the buffer is guaranteed to have at least 3 characters in
1452 it (or however many are left in the file, if less) on entry to the
1453 switch. This is enough to handle trigraphs and the "\\\n\r" and
1454 "\\\r\n" cases.
1455
1456 The end of the buffer is marked by a '\\', which, being a special
1457 character, guarantees we will exit the fast-scan loops and perform
1458 a refill. */
46d07497 1459
45b966db
ZW
1460long
1461_cpp_read_and_prescan (pfile, fp, desc, len)
1462 cpp_reader *pfile;
1463 cpp_buffer *fp;
1464 int desc;
1465 size_t len;
1466{
1467 U_CHAR *buf = (U_CHAR *) xmalloc (len);
1468 U_CHAR *ip, *op, *line_base;
1469 U_CHAR *ibase;
45b966db
ZW
1470 unsigned long line;
1471 unsigned int deferred_newlines;
45b966db 1472 size_t offset;
04e3ec78 1473 int count = 0;
45b966db
ZW
1474
1475 offset = 0;
04e3ec78 1476 deferred_newlines = 0;
45b966db
ZW
1477 op = buf;
1478 line_base = buf;
1479 line = 1;
04e3ec78
NB
1480 ibase = pfile->input_buffer + 3;
1481 ip = ibase;
1482 ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
45b966db
ZW
1483
1484 for (;;)
1485 {
04e3ec78
NB
1486 U_CHAR *near_buff_end;
1487
1488 /* Copy previous char plus unprocessed (at most 2) chars
1489 to beginning of buffer, refill it with another
1490 read(), and continue processing */
1491 memcpy(ip - count - 1, ip - 1, 3);
1492 ip -= count;
45b966db 1493
04e3ec78 1494 count = read (desc, ibase, pfile->input_buffer_len);
45b966db
ZW
1495 if (count < 0)
1496 goto error;
04e3ec78
NB
1497
1498 ibase[count] = '\\'; /* Marks end of buffer */
1499 if (count)
45b966db 1500 {
04e3ec78
NB
1501 near_buff_end = pfile->input_buffer + count;
1502 offset += count;
45b966db 1503 if (offset > len)
04e3ec78
NB
1504 {
1505 size_t delta_op;
1506 size_t delta_line_base;
1507 len *= 2;
1508 if (offset > len)
1509 /* len overflowed.
1510 This could happen if the file is larger than half the
1511 maximum address space of the machine. */
1512 goto too_big;
1513
1514 delta_op = op - buf;
1515 delta_line_base = line_base - buf;
1516 buf = (U_CHAR *) xrealloc (buf, len);
1517 op = buf + delta_op;
1518 line_base = buf + delta_line_base;
1519 }
1520 }
1521 else
1522 {
1523 if (ip == ibase)
1524 break;
1525 /* Allow normal processing of the (at most 2) remaining
1526 characters. The end-of-buffer marker is still present
1527 and prevents false matches within the switch. */
1528 near_buff_end = ibase - 1;
45b966db
ZW
1529 }
1530
1531 for (;;)
1532 {
04e3ec78 1533 unsigned int span;
45b966db 1534
04e3ec78 1535 /* Deal with \-newline, potentially in the middle of a token. */
45b966db
ZW
1536 if (deferred_newlines)
1537 {
2a87fbe8 1538 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
04e3ec78
NB
1539 {
1540 /* Previous was not white space. Skip to white
1541 space, if we can, before outputting the \r's */
1542 span = 0;
1543 while (ip[span] != ' '
1544 && ip[span] != '\t'
1545 && ip[span] != '\n'
2a87fbe8 1546 && NORMAL(ip[span]))
04e3ec78
NB
1547 span++;
1548 memcpy (op, ip, span);
1549 op += span;
1550 ip += span;
2a87fbe8 1551 if (! NORMAL(ip[0]))
04e3ec78
NB
1552 goto do_speccase;
1553 }
1554 while (deferred_newlines)
1555 deferred_newlines--, *op++ = '\r';
45b966db
ZW
1556 }
1557
1558 /* Copy as much as we can without special treatment. */
04e3ec78 1559 span = 0;
2a87fbe8 1560 while (NORMAL (ip[span])) span++;
45b966db
ZW
1561 memcpy (op, ip, span);
1562 op += span;
1563 ip += span;
1564
04e3ec78
NB
1565 do_speccase:
1566 if (ip > near_buff_end) /* Do we have enough chars? */
1567 break;
2a87fbe8 1568 switch (chartab[*ip++])
45b966db 1569 {
45b966db 1570 case SPECCASE_CR: /* \r */
04e3ec78 1571 if (ip[-2] != '\n')
45b966db 1572 {
04e3ec78
NB
1573 if (*ip == '\n')
1574 ip++;
1575 *op++ = '\n';
45b966db 1576 }
45b966db
ZW
1577 break;
1578
1579 case SPECCASE_BACKSLASH: /* \ */
04e3ec78 1580 if (*ip == '\n')
45b966db 1581 {
04e3ec78 1582 deferred_newlines++;
45b966db
ZW
1583 ip++;
1584 if (*ip == '\r') ip++;
45b966db
ZW
1585 }
1586 else if (*ip == '\r')
1587 {
04e3ec78 1588 deferred_newlines++;
45b966db
ZW
1589 ip++;
1590 if (*ip == '\n') ip++;
45b966db
ZW
1591 }
1592 else
1593 *op++ = '\\';
04e3ec78 1594 break;
45b966db
ZW
1595
1596 case SPECCASE_QUESTION: /* ? */
1597 {
1598 unsigned int d, t;
04e3ec78
NB
1599
1600 *op++ = '?'; /* Normal non-trigraph case */
1601 if (ip[0] != '?')
1602 break;
1603
45b966db 1604 d = ip[1];
2a87fbe8
ZW
1605 t = chartab[d];
1606 if (NONTRI (t))
04e3ec78 1607 break;
45b966db 1608
ae79697b 1609 if (CPP_OPTION (pfile, warn_trigraphs))
45b966db
ZW
1610 {
1611 unsigned long col;
1612 line_base = find_position (line_base, op, &line);
1613 col = op - line_base + 1;
ae79697b 1614 if (CPP_OPTION (pfile, trigraphs))
45b966db 1615 cpp_warning_with_line (pfile, line, col,
04e3ec78 1616 "trigraph ??%c converted to %c", d, t);
45b966db
ZW
1617 else
1618 cpp_warning_with_line (pfile, line, col,
04e3ec78 1619 "trigraph ??%c ignored", d);
45b966db 1620 }
04e3ec78
NB
1621
1622 ip += 2;
ae79697b 1623 if (CPP_OPTION (pfile, trigraphs))
45b966db 1624 {
04e3ec78 1625 op[-1] = t; /* Overwrite '?' */
45b966db 1626 if (t == '\\')
04e3ec78
NB
1627 {
1628 op--;
1629 *--ip = '\\';
1630 goto do_speccase; /* May need buffer refill */
1631 }
45b966db
ZW
1632 }
1633 else
1634 {
45b966db
ZW
1635 *op++ = '?';
1636 *op++ = d;
1637 }
1638 }
04e3ec78 1639 break;
45b966db
ZW
1640 }
1641 }
1642 }
1643
1644 if (offset == 0)
1645 return 0;
1646
45b966db
ZW
1647 if (op[-1] != '\n')
1648 {
1649 unsigned long col;
1650 line_base = find_position (line_base, op, &line);
1651 col = op - line_base + 1;
1652 cpp_warning_with_line (pfile, line, col, "no newline at end of file\n");
1653 if (offset + 1 > len)
1654 {
1655 len += 1;
1656 if (offset + 1 > len)
1657 goto too_big;
1658 buf = (U_CHAR *) xrealloc (buf, len);
1659 op = buf + offset;
1660 }
1661 *op++ = '\n';
1662 }
1663
1664 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
1665 return op - buf;
1666
1667 too_big:
1668 cpp_error (pfile, "file is too large (>%lu bytes)\n", (unsigned long)offset);
1669 free (buf);
1670 return -1;
1671
1672 error:
1673 cpp_error_from_errno (pfile, fp->ihash->name);
1674 free (buf);
1675 return -1;
1676}
1677
2a87fbe8
ZW
1678/* Allocate pfile->input_buffer, and initialize chartab[]
1679 if it hasn't happened already. */
46d07497 1680
45b966db
ZW
1681void
1682_cpp_init_input_buffer (pfile)
1683 cpp_reader *pfile;
1684{
1685 U_CHAR *tmp;
1686
2a87fbe8 1687 init_chartab ();
04e3ec78 1688
45b966db
ZW
1689 /* Determine the appropriate size for the input buffer. Normal C
1690 source files are smaller than eight K. */
04e3ec78
NB
1691 /* 8Kbytes of buffer proper, 1 to detect running off the end without
1692 address arithmetic all the time, and 3 for pushback during buffer
1693 refill, in case there's a potential trigraph or end-of-line
1694 digraph at the end of a block. */
45b966db 1695
04e3ec78 1696 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
45b966db
ZW
1697 pfile->input_buffer = tmp;
1698 pfile->input_buffer_len = 8192;
1699}
This page took 0.234477 seconds and 5 git commands to generate.