]>
Commit | Line | Data |
---|---|---|
45b966db ZW |
1 | /* CPP Library - lexical analysis. |
2 | Copyright (C) 2000 Free Software Foundation, Inc. | |
3 | Contributed by Per Bothner, 1994-95. | |
4 | Based on CCCP program by Paul Rubin, June 1986 | |
5 | Adapted to ANSI C, Richard Stallman, Jan 1987 | |
6 | Broken out to separate file, Zack Weinberg, Mar 2000 | |
7 | ||
8 | This program is free software; you can redistribute it and/or modify it | |
9 | under the terms of the GNU General Public License as published by the | |
10 | Free Software Foundation; either version 2, or (at your option) any | |
11 | later version. | |
12 | ||
13 | This program is distributed in the hope that it will be useful, | |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with this program; if not, write to the Free Software | |
20 | Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | |
21 | ||
22 | #include "config.h" | |
23 | #include "system.h" | |
24 | #include "intl.h" | |
25 | #include "cpplib.h" | |
26 | #include "cpphash.h" | |
27 | ||
28 | #define PEEKN(N) (CPP_BUFFER (pfile)->rlimit - CPP_BUFFER (pfile)->cur >= (N) \ | |
29 | ? CPP_BUFFER (pfile)->cur[N] : EOF) | |
30 | #define FORWARD(N) CPP_FORWARD (CPP_BUFFER (pfile), (N)) | |
31 | #define GETC() CPP_BUF_GET (CPP_BUFFER (pfile)) | |
32 | #define PEEKC() CPP_BUF_PEEK (CPP_BUFFER (pfile)) | |
33 | ||
34 | static void skip_block_comment PARAMS ((cpp_reader *)); | |
35 | static void skip_line_comment PARAMS ((cpp_reader *)); | |
36 | static int maybe_macroexpand PARAMS ((cpp_reader *, long)); | |
37 | static int skip_comment PARAMS ((cpp_reader *, int)); | |
38 | static int copy_comment PARAMS ((cpp_reader *, int)); | |
39 | static void skip_string PARAMS ((cpp_reader *, int)); | |
40 | static void parse_string PARAMS ((cpp_reader *, int)); | |
41 | static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *)); | |
42 | static int null_cleanup PARAMS ((cpp_buffer *, cpp_reader *)); | |
43 | ||
44 | /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */ | |
45 | ||
46 | void | |
47 | _cpp_grow_token_buffer (pfile, n) | |
48 | cpp_reader *pfile; | |
49 | long n; | |
50 | { | |
51 | long old_written = CPP_WRITTEN (pfile); | |
52 | pfile->token_buffer_size = n + 2 * pfile->token_buffer_size; | |
53 | pfile->token_buffer = (U_CHAR *) | |
54 | xrealloc(pfile->token_buffer, pfile->token_buffer_size); | |
55 | CPP_SET_WRITTEN (pfile, old_written); | |
56 | } | |
57 | ||
58 | static int | |
59 | null_cleanup (pbuf, pfile) | |
60 | cpp_buffer *pbuf ATTRIBUTE_UNUSED; | |
61 | cpp_reader *pfile ATTRIBUTE_UNUSED; | |
62 | { | |
63 | return 0; | |
64 | } | |
65 | ||
66 | /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack. | |
67 | If BUFFER != NULL, then use the LENGTH characters in BUFFER | |
68 | as the new input buffer. | |
69 | Return the new buffer, or NULL on failure. */ | |
70 | ||
71 | cpp_buffer * | |
72 | cpp_push_buffer (pfile, buffer, length) | |
73 | cpp_reader *pfile; | |
74 | const U_CHAR *buffer; | |
75 | long length; | |
76 | { | |
77 | cpp_buffer *buf = CPP_BUFFER (pfile); | |
78 | cpp_buffer *new; | |
79 | if (++pfile->buffer_stack_depth == CPP_STACK_MAX) | |
80 | { | |
81 | cpp_fatal (pfile, "macro or `#include' recursion too deep"); | |
82 | return NULL; | |
83 | } | |
84 | ||
85 | new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer)); | |
86 | ||
87 | new->if_stack = pfile->if_stack; | |
88 | new->cleanup = null_cleanup; | |
89 | new->buf = new->cur = buffer; | |
90 | new->alimit = new->rlimit = buffer + length; | |
91 | new->prev = buf; | |
92 | new->mark = -1; | |
93 | new->line_base = NULL; | |
94 | ||
95 | CPP_BUFFER (pfile) = new; | |
96 | return new; | |
97 | } | |
98 | ||
99 | cpp_buffer * | |
100 | cpp_pop_buffer (pfile) | |
101 | cpp_reader *pfile; | |
102 | { | |
103 | cpp_buffer *buf = CPP_BUFFER (pfile); | |
104 | if (ACTIVE_MARK_P (pfile)) | |
105 | cpp_ice (pfile, "mark active in cpp_pop_buffer"); | |
106 | (*buf->cleanup) (buf, pfile); | |
107 | CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf); | |
108 | free (buf); | |
109 | pfile->buffer_stack_depth--; | |
110 | return CPP_BUFFER (pfile); | |
111 | } | |
112 | ||
113 | /* Scan until CPP_BUFFER (PFILE) is exhausted into PFILE->token_buffer. | |
114 | Pop the buffer when done. */ | |
115 | ||
116 | void | |
117 | cpp_scan_buffer (pfile) | |
118 | cpp_reader *pfile; | |
119 | { | |
120 | cpp_buffer *buffer = CPP_BUFFER (pfile); | |
121 | enum cpp_token token; | |
ae79697b | 122 | if (CPP_OPTION (pfile, no_output)) |
45b966db ZW |
123 | { |
124 | long old_written = CPP_WRITTEN (pfile); | |
125 | /* In no-output mode, we can ignore everything but directives. */ | |
126 | for (;;) | |
127 | { | |
128 | if (! pfile->only_seen_white) | |
129 | _cpp_skip_rest_of_line (pfile); | |
130 | token = cpp_get_token (pfile); | |
131 | if (token == CPP_EOF) /* Should not happen ... */ | |
132 | break; | |
133 | if (token == CPP_POP && CPP_BUFFER (pfile) == buffer) | |
134 | { | |
135 | if (CPP_PREV_BUFFER (CPP_BUFFER (pfile)) != NULL) | |
136 | cpp_pop_buffer (pfile); | |
137 | break; | |
138 | } | |
139 | } | |
140 | CPP_SET_WRITTEN (pfile, old_written); | |
141 | } | |
142 | else | |
143 | { | |
144 | for (;;) | |
145 | { | |
146 | token = cpp_get_token (pfile); | |
147 | if (token == CPP_EOF) /* Should not happen ... */ | |
148 | break; | |
149 | if (token == CPP_POP && CPP_BUFFER (pfile) == buffer) | |
150 | { | |
151 | if (CPP_PREV_BUFFER (CPP_BUFFER (pfile)) != NULL) | |
152 | cpp_pop_buffer (pfile); | |
153 | break; | |
154 | } | |
155 | } | |
156 | } | |
157 | } | |
158 | ||
159 | /* | |
160 | * Rescan a string (which may have escape marks) into pfile's buffer. | |
161 | * Place the result in pfile->token_buffer. | |
162 | * | |
163 | * The input is copied before it is scanned, so it is safe to pass | |
164 | * it something from the token_buffer that will get overwritten | |
165 | * (because it follows CPP_WRITTEN). This is used by do_include. | |
166 | */ | |
167 | ||
168 | void | |
169 | cpp_expand_to_buffer (pfile, buf, length) | |
170 | cpp_reader *pfile; | |
171 | const U_CHAR *buf; | |
172 | int length; | |
173 | { | |
174 | register cpp_buffer *ip; | |
175 | U_CHAR *buf1; | |
176 | int save_no_output; | |
177 | ||
178 | if (length < 0) | |
179 | { | |
180 | cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer"); | |
181 | return; | |
182 | } | |
183 | ||
184 | /* Set up the input on the input stack. */ | |
185 | ||
186 | buf1 = (U_CHAR *) alloca (length + 1); | |
187 | memcpy (buf1, buf, length); | |
188 | buf1[length] = 0; | |
189 | ||
190 | ip = cpp_push_buffer (pfile, buf1, length); | |
191 | if (ip == NULL) | |
192 | return; | |
193 | ip->has_escapes = 1; | |
194 | ||
195 | /* Scan the input, create the output. */ | |
ae79697b ZW |
196 | save_no_output = CPP_OPTION (pfile, no_output); |
197 | CPP_OPTION (pfile, no_output) = 0; | |
198 | CPP_OPTION (pfile, no_line_commands)++; | |
45b966db | 199 | cpp_scan_buffer (pfile); |
ae79697b ZW |
200 | CPP_OPTION (pfile, no_line_commands)--; |
201 | CPP_OPTION (pfile, no_output) = save_no_output; | |
45b966db ZW |
202 | |
203 | CPP_NUL_TERMINATE (pfile); | |
204 | } | |
205 | ||
206 | void | |
207 | cpp_buf_line_and_col (pbuf, linep, colp) | |
208 | register cpp_buffer *pbuf; | |
209 | long *linep, *colp; | |
210 | { | |
211 | if (pbuf) | |
212 | { | |
213 | *linep = pbuf->lineno; | |
214 | if (colp) | |
215 | *colp = pbuf->cur - pbuf->line_base; | |
216 | } | |
217 | else | |
218 | { | |
219 | *linep = 0; | |
220 | if (colp) | |
221 | *colp = 0; | |
222 | } | |
223 | } | |
224 | ||
225 | /* Return the topmost cpp_buffer that corresponds to a file (not a macro). */ | |
226 | ||
227 | cpp_buffer * | |
228 | cpp_file_buffer (pfile) | |
229 | cpp_reader *pfile; | |
230 | { | |
231 | cpp_buffer *ip; | |
232 | ||
233 | for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip)) | |
234 | if (ip->ihash != NULL) | |
235 | return ip; | |
236 | return NULL; | |
237 | } | |
238 | ||
239 | /* Skip a C-style block comment. We know it's a comment, and point is | |
240 | at the second character of the starter. */ | |
241 | static void | |
242 | skip_block_comment (pfile) | |
243 | cpp_reader *pfile; | |
244 | { | |
245 | int c, prev_c = -1; | |
246 | long line, col; | |
247 | ||
248 | FORWARD(1); | |
249 | cpp_buf_line_and_col (CPP_BUFFER (pfile), &line, &col); | |
250 | for (;;) | |
251 | { | |
252 | c = GETC (); | |
253 | if (c == EOF) | |
254 | { | |
255 | cpp_error_with_line (pfile, line, col, "unterminated comment"); | |
256 | return; | |
257 | } | |
258 | else if (c == '\n' || c == '\r') | |
259 | { | |
260 | /* \r cannot be a macro escape marker here. */ | |
261 | if (!ACTIVE_MARK_P (pfile)) | |
262 | CPP_BUMP_LINE (pfile); | |
263 | } | |
264 | else if (c == '/' && prev_c == '*') | |
265 | return; | |
266 | else if (c == '*' && prev_c == '/' | |
ae79697b | 267 | && CPP_OPTION (pfile, warn_comments)) |
45b966db ZW |
268 | cpp_warning (pfile, "`/*' within comment"); |
269 | ||
270 | prev_c = c; | |
271 | } | |
272 | } | |
273 | ||
274 | /* Skip a C++/Chill line comment. We know it's a comment, and point | |
275 | is at the second character of the initiator. */ | |
276 | static void | |
277 | skip_line_comment (pfile) | |
278 | cpp_reader *pfile; | |
279 | { | |
280 | FORWARD(1); | |
281 | for (;;) | |
282 | { | |
283 | int c = GETC (); | |
284 | ||
285 | /* We don't have to worry about EOF in here. */ | |
286 | if (c == '\n') | |
287 | { | |
288 | /* Don't consider final '\n' to be part of comment. */ | |
289 | FORWARD(-1); | |
290 | return; | |
291 | } | |
292 | else if (c == '\r') | |
293 | { | |
294 | /* \r cannot be a macro escape marker here. */ | |
295 | if (!ACTIVE_MARK_P (pfile)) | |
296 | CPP_BUMP_LINE (pfile); | |
ae79697b | 297 | if (CPP_OPTION (pfile, warn_comments)) |
45b966db ZW |
298 | cpp_warning (pfile, "backslash-newline within line comment"); |
299 | } | |
300 | } | |
301 | } | |
302 | ||
303 | /* Skip a comment - C, C++, or Chill style. M is the first character | |
304 | of the comment marker. If this really is a comment, skip to its | |
305 | end and return ' '. If this is not a comment, return M (which will | |
306 | be '/' or '-'). */ | |
307 | ||
308 | static int | |
309 | skip_comment (pfile, m) | |
310 | cpp_reader *pfile; | |
311 | int m; | |
312 | { | |
313 | if (m == '/' && PEEKC() == '*') | |
314 | { | |
315 | skip_block_comment (pfile); | |
316 | return ' '; | |
317 | } | |
318 | else if (m == '/' && PEEKC() == '/') | |
319 | { | |
320 | if (CPP_BUFFER (pfile)->system_header_p) | |
321 | { | |
322 | /* We silently allow C++ comments in system headers, irrespective | |
323 | of conformance mode, because lots of busted systems do that | |
324 | and trying to clean it up in fixincludes is a nightmare. */ | |
325 | skip_line_comment (pfile); | |
326 | return ' '; | |
327 | } | |
ae79697b | 328 | else if (CPP_OPTION (pfile, cplusplus_comments)) |
45b966db | 329 | { |
ae79697b | 330 | if (CPP_OPTION (pfile, c89) |
45b966db ZW |
331 | && CPP_PEDANTIC (pfile) |
332 | && ! CPP_BUFFER (pfile)->warned_cplusplus_comments) | |
333 | { | |
334 | cpp_pedwarn (pfile, | |
335 | "C++ style comments are not allowed in ISO C89"); | |
336 | cpp_pedwarn (pfile, | |
337 | "(this will be reported only once per input file)"); | |
338 | CPP_BUFFER (pfile)->warned_cplusplus_comments = 1; | |
339 | } | |
340 | skip_line_comment (pfile); | |
341 | return ' '; | |
342 | } | |
343 | else | |
344 | return m; | |
345 | } | |
346 | else if (m == '-' && PEEKC() == '-' | |
ae79697b | 347 | && CPP_OPTION (pfile, chill)) |
45b966db ZW |
348 | { |
349 | skip_line_comment (pfile); | |
350 | return ' '; | |
351 | } | |
352 | else | |
353 | return m; | |
354 | } | |
355 | ||
356 | /* Identical to skip_comment except that it copies the comment into the | |
357 | token_buffer. This is used if !discard_comments. */ | |
358 | static int | |
359 | copy_comment (pfile, m) | |
360 | cpp_reader *pfile; | |
361 | int m; | |
362 | { | |
363 | const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */ | |
364 | const U_CHAR *limit; | |
365 | ||
366 | if (skip_comment (pfile, m) == m) | |
367 | return m; | |
368 | ||
369 | limit = CPP_BUFFER (pfile)->cur; | |
370 | CPP_RESERVE (pfile, limit - start + 2); | |
371 | CPP_PUTC_Q (pfile, m); | |
372 | for (; start <= limit; start++) | |
373 | if (*start != '\r') | |
374 | CPP_PUTC_Q (pfile, *start); | |
375 | ||
376 | return ' '; | |
377 | } | |
378 | ||
379 | /* Skip whitespace \-newline and comments. Does not macro-expand. */ | |
380 | ||
381 | void | |
382 | _cpp_skip_hspace (pfile) | |
383 | cpp_reader *pfile; | |
384 | { | |
385 | int c; | |
386 | while (1) | |
387 | { | |
388 | c = GETC(); | |
389 | if (c == EOF) | |
390 | return; | |
391 | else if (is_hspace(c)) | |
392 | { | |
393 | if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile)) | |
394 | cpp_pedwarn (pfile, "%s in preprocessing directive", | |
395 | c == '\f' ? "formfeed" : "vertical tab"); | |
396 | } | |
397 | else if (c == '\r') | |
398 | { | |
399 | /* \r is a backslash-newline marker if !has_escapes, and | |
400 | a deletable-whitespace or no-reexpansion marker otherwise. */ | |
401 | if (CPP_BUFFER (pfile)->has_escapes) | |
402 | { | |
403 | if (PEEKC() == ' ') | |
404 | FORWARD(1); | |
405 | else | |
406 | break; | |
407 | } | |
408 | else | |
409 | CPP_BUMP_LINE (pfile); | |
410 | } | |
411 | else if (c == '/' || c == '-') | |
412 | { | |
413 | c = skip_comment (pfile, c); | |
414 | if (c != ' ') | |
415 | break; | |
416 | } | |
417 | else | |
418 | break; | |
419 | } | |
420 | FORWARD(-1); | |
421 | } | |
422 | ||
423 | /* Read and discard the rest of the current line. */ | |
424 | ||
425 | void | |
426 | _cpp_skip_rest_of_line (pfile) | |
427 | cpp_reader *pfile; | |
428 | { | |
429 | for (;;) | |
430 | { | |
431 | int c = GETC(); | |
432 | switch (c) | |
433 | { | |
434 | case '\n': | |
435 | FORWARD(-1); | |
436 | case EOF: | |
437 | return; | |
438 | ||
439 | case '\r': | |
440 | if (! CPP_BUFFER (pfile)->has_escapes) | |
441 | CPP_BUMP_LINE (pfile); | |
442 | break; | |
443 | ||
444 | case '\'': | |
445 | case '\"': | |
446 | skip_string (pfile, c); | |
447 | break; | |
448 | ||
449 | case '/': | |
450 | case '-': | |
451 | skip_comment (pfile, c); | |
452 | break; | |
453 | ||
454 | case '\f': | |
455 | case '\v': | |
456 | if (CPP_PEDANTIC (pfile)) | |
457 | cpp_pedwarn (pfile, "%s in preprocessing directive", | |
458 | c == '\f' ? "formfeed" : "vertical tab"); | |
459 | break; | |
460 | ||
461 | } | |
462 | } | |
463 | } | |
464 | ||
465 | /* Parse an identifier starting with C. */ | |
466 | ||
467 | void | |
468 | _cpp_parse_name (pfile, c) | |
469 | cpp_reader *pfile; | |
470 | int c; | |
471 | { | |
472 | for (;;) | |
473 | { | |
474 | if (! is_idchar(c)) | |
475 | { | |
476 | FORWARD (-1); | |
477 | break; | |
478 | } | |
479 | ||
480 | if (c == '$' && CPP_PEDANTIC (pfile)) | |
481 | cpp_pedwarn (pfile, "`$' in identifier"); | |
482 | ||
483 | CPP_RESERVE(pfile, 2); /* One more for final NUL. */ | |
484 | CPP_PUTC_Q (pfile, c); | |
485 | c = GETC(); | |
486 | if (c == EOF) | |
487 | break; | |
488 | } | |
489 | CPP_NUL_TERMINATE_Q (pfile); | |
490 | return; | |
491 | } | |
492 | ||
493 | /* Parse and skip over a string starting with C. A single quoted | |
494 | string is treated like a double -- some programs (e.g., troff) are | |
495 | perverse this way. (However, a single quoted string is not allowed | |
496 | to extend over multiple lines.) */ | |
497 | static void | |
498 | skip_string (pfile, c) | |
499 | cpp_reader *pfile; | |
500 | int c; | |
501 | { | |
502 | long start_line, start_column; | |
503 | cpp_buf_line_and_col (cpp_file_buffer (pfile), &start_line, &start_column); | |
504 | ||
505 | while (1) | |
506 | { | |
507 | int cc = GETC(); | |
508 | switch (cc) | |
509 | { | |
510 | case EOF: | |
511 | cpp_error_with_line (pfile, start_line, start_column, | |
512 | "unterminated string or character constant"); | |
513 | if (pfile->multiline_string_line != start_line | |
514 | && pfile->multiline_string_line != 0) | |
515 | cpp_error_with_line (pfile, | |
516 | pfile->multiline_string_line, -1, | |
517 | "possible real start of unterminated constant"); | |
518 | pfile->multiline_string_line = 0; | |
519 | return; | |
520 | ||
521 | case '\n': | |
522 | CPP_BUMP_LINE (pfile); | |
523 | /* In Fortran and assembly language, silently terminate | |
524 | strings of either variety at end of line. This is a | |
525 | kludge around not knowing where comments are in these | |
526 | languages. */ | |
ae79697b ZW |
527 | if (CPP_OPTION (pfile, lang_fortran) |
528 | || CPP_OPTION (pfile, lang_asm)) | |
45b966db ZW |
529 | { |
530 | FORWARD(-1); | |
531 | return; | |
532 | } | |
533 | /* Character constants may not extend over multiple lines. | |
534 | In Standard C, neither may strings. We accept multiline | |
535 | strings as an extension. */ | |
536 | if (c == '\'') | |
537 | { | |
538 | cpp_error_with_line (pfile, start_line, start_column, | |
539 | "unterminated character constant"); | |
540 | FORWARD(-1); | |
541 | return; | |
542 | } | |
543 | if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0) | |
544 | cpp_pedwarn_with_line (pfile, start_line, start_column, | |
545 | "string constant runs past end of line"); | |
546 | if (pfile->multiline_string_line == 0) | |
547 | pfile->multiline_string_line = start_line; | |
548 | break; | |
549 | ||
550 | case '\r': | |
551 | if (CPP_BUFFER (pfile)->has_escapes) | |
552 | { | |
553 | cpp_ice (pfile, "\\r escape inside string constant"); | |
554 | FORWARD(1); | |
555 | } | |
556 | else | |
557 | /* Backslash newline is replaced by nothing at all. */ | |
558 | CPP_BUMP_LINE (pfile); | |
559 | break; | |
560 | ||
561 | case '\\': | |
562 | FORWARD(1); | |
563 | break; | |
564 | ||
565 | case '\"': | |
566 | case '\'': | |
567 | if (cc == c) | |
568 | return; | |
569 | break; | |
570 | } | |
571 | } | |
572 | } | |
573 | ||
574 | /* Parse a string and copy it to the output. */ | |
575 | ||
576 | static void | |
577 | parse_string (pfile, c) | |
578 | cpp_reader *pfile; | |
579 | int c; | |
580 | { | |
581 | const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */ | |
582 | const U_CHAR *limit; | |
583 | ||
584 | skip_string (pfile, c); | |
585 | ||
586 | limit = CPP_BUFFER (pfile)->cur; | |
587 | CPP_RESERVE (pfile, limit - start + 2); | |
588 | CPP_PUTC_Q (pfile, c); | |
589 | for (; start < limit; start++) | |
590 | if (*start != '\r') | |
591 | CPP_PUTC_Q (pfile, *start); | |
592 | } | |
593 | ||
594 | /* Read an assertion into the token buffer, converting to | |
595 | canonical form: `#predicate(a n swe r)' The next non-whitespace | |
596 | character to read should be the first letter of the predicate. | |
597 | Returns 0 for syntax error, 1 for bare predicate, 2 for predicate | |
598 | with answer (see callers for why). In case of 0, an error has been | |
599 | printed. */ | |
600 | int | |
601 | _cpp_parse_assertion (pfile) | |
602 | cpp_reader *pfile; | |
603 | { | |
604 | int c, dropwhite; | |
605 | _cpp_skip_hspace (pfile); | |
606 | c = PEEKC(); | |
bfab56e7 ZW |
607 | if (c == '\n') |
608 | { | |
609 | cpp_error (pfile, "assertion without predicate"); | |
610 | return 0; | |
611 | } | |
612 | else if (! is_idstart(c)) | |
45b966db ZW |
613 | { |
614 | cpp_error (pfile, "assertion predicate is not an identifier"); | |
615 | return 0; | |
616 | } | |
617 | CPP_PUTC(pfile, '#'); | |
618 | FORWARD(1); | |
619 | _cpp_parse_name (pfile, c); | |
620 | ||
621 | c = PEEKC(); | |
622 | if (c != '(') | |
623 | { | |
624 | if (is_hspace(c) || c == '\r') | |
625 | _cpp_skip_hspace (pfile); | |
626 | c = PEEKC(); | |
627 | } | |
628 | if (c != '(') | |
629 | return 1; | |
630 | ||
631 | CPP_PUTC(pfile, '('); | |
632 | FORWARD(1); | |
633 | dropwhite = 1; | |
634 | while ((c = GETC()) != ')') | |
635 | { | |
636 | if (is_space(c)) | |
637 | { | |
638 | if (! dropwhite) | |
639 | { | |
640 | CPP_PUTC(pfile, ' '); | |
641 | dropwhite = 1; | |
642 | } | |
643 | } | |
644 | else if (c == '\n' || c == EOF) | |
645 | { | |
646 | if (c == '\n') FORWARD(-1); | |
647 | cpp_error (pfile, "un-terminated assertion answer"); | |
648 | return 0; | |
649 | } | |
650 | else if (c == '\r') | |
651 | /* \r cannot be a macro escape here. */ | |
652 | CPP_BUMP_LINE (pfile); | |
653 | else | |
654 | { | |
655 | CPP_PUTC (pfile, c); | |
656 | dropwhite = 0; | |
657 | } | |
658 | } | |
659 | ||
660 | if (pfile->limit[-1] == ' ') | |
661 | pfile->limit[-1] = ')'; | |
662 | else if (pfile->limit[-1] == '(') | |
663 | { | |
664 | cpp_error (pfile, "empty token sequence in assertion"); | |
665 | return 0; | |
666 | } | |
667 | else | |
668 | CPP_PUTC (pfile, ')'); | |
669 | ||
670 | CPP_NUL_TERMINATE (pfile); | |
671 | return 2; | |
672 | } | |
673 | ||
674 | /* Get the next token, and add it to the text in pfile->token_buffer. | |
675 | Return the kind of token we got. */ | |
676 | ||
677 | enum cpp_token | |
678 | _cpp_lex_token (pfile) | |
679 | cpp_reader *pfile; | |
680 | { | |
681 | register int c, c2, c3; | |
682 | enum cpp_token token; | |
45b966db ZW |
683 | |
684 | get_next: | |
685 | c = GETC(); | |
686 | switch (c) | |
687 | { | |
688 | case EOF: | |
689 | return CPP_EOF; | |
690 | ||
691 | case '/': | |
692 | if (PEEKC () == '=') | |
693 | goto op2; | |
694 | ||
695 | comment: | |
ae79697b | 696 | if (CPP_OPTION (pfile, discard_comments)) |
45b966db ZW |
697 | c = skip_comment (pfile, c); |
698 | else | |
699 | c = copy_comment (pfile, c); | |
700 | if (c != ' ') | |
701 | goto randomchar; | |
702 | ||
703 | /* Comments are equivalent to spaces. | |
704 | For -traditional, a comment is equivalent to nothing. */ | |
ae79697b | 705 | if (CPP_TRADITIONAL (pfile) || !CPP_OPTION (pfile, discard_comments)) |
45b966db ZW |
706 | return CPP_COMMENT; |
707 | else | |
708 | { | |
709 | CPP_PUTC (pfile, c); | |
710 | return CPP_HSPACE; | |
711 | } | |
712 | ||
713 | case '#': | |
714 | if (pfile->parsing_if_directive) | |
715 | { | |
716 | _cpp_skip_hspace (pfile); | |
bfab56e7 ZW |
717 | if (_cpp_parse_assertion (pfile)) |
718 | return CPP_ASSERTION; | |
719 | goto randomchar; | |
45b966db ZW |
720 | } |
721 | ||
722 | if (pfile->parsing_define_directive && ! CPP_TRADITIONAL (pfile)) | |
723 | { | |
724 | CPP_RESERVE (pfile, 3); | |
725 | CPP_PUTC_Q (pfile, '#'); | |
726 | CPP_NUL_TERMINATE_Q (pfile); | |
727 | if (PEEKC () != '#') | |
728 | return CPP_STRINGIZE; | |
729 | ||
730 | FORWARD (1); | |
731 | CPP_PUTC_Q (pfile, '#'); | |
732 | CPP_NUL_TERMINATE_Q (pfile); | |
733 | return CPP_TOKPASTE; | |
734 | } | |
735 | ||
736 | if (!pfile->only_seen_white) | |
737 | goto randomchar; | |
45b966db ZW |
738 | return CPP_DIRECTIVE; |
739 | ||
740 | case '\"': | |
741 | case '\'': | |
742 | parse_string (pfile, c); | |
743 | pfile->only_seen_white = 0; | |
744 | return c == '\'' ? CPP_CHAR : CPP_STRING; | |
745 | ||
746 | case '$': | |
ae79697b | 747 | if (!CPP_OPTION (pfile, dollars_in_ident)) |
45b966db ZW |
748 | goto randomchar; |
749 | goto letter; | |
750 | ||
751 | case ':': | |
ae79697b | 752 | if (CPP_OPTION (pfile, cplusplus) && PEEKC () == ':') |
45b966db ZW |
753 | goto op2; |
754 | goto randomchar; | |
755 | ||
756 | case '&': | |
757 | case '+': | |
758 | case '|': | |
759 | c2 = PEEKC (); | |
760 | if (c2 == c || c2 == '=') | |
761 | goto op2; | |
762 | goto randomchar; | |
763 | ||
764 | case '*': | |
765 | case '!': | |
766 | case '%': | |
767 | case '=': | |
768 | case '^': | |
769 | if (PEEKC () == '=') | |
770 | goto op2; | |
771 | goto randomchar; | |
772 | ||
773 | case '-': | |
774 | c2 = PEEKC (); | |
775 | if (c2 == '-') | |
776 | { | |
ae79697b | 777 | if (CPP_OPTION (pfile, chill)) |
45b966db ZW |
778 | goto comment; /* Chill style comment */ |
779 | else | |
780 | goto op2; | |
781 | } | |
782 | else if (c2 == '=') | |
783 | goto op2; | |
784 | else if (c2 == '>') | |
785 | { | |
ae79697b | 786 | if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*') |
45b966db ZW |
787 | { |
788 | /* In C++, there's a ->* operator. */ | |
789 | token = CPP_OTHER; | |
790 | pfile->only_seen_white = 0; | |
791 | CPP_RESERVE (pfile, 4); | |
792 | CPP_PUTC_Q (pfile, c); | |
793 | CPP_PUTC_Q (pfile, GETC ()); | |
794 | CPP_PUTC_Q (pfile, GETC ()); | |
795 | CPP_NUL_TERMINATE_Q (pfile); | |
796 | return token; | |
797 | } | |
798 | goto op2; | |
799 | } | |
800 | goto randomchar; | |
801 | ||
802 | case '<': | |
803 | if (pfile->parsing_include_directive) | |
804 | { | |
805 | for (;;) | |
806 | { | |
807 | CPP_PUTC (pfile, c); | |
808 | if (c == '>') | |
809 | break; | |
810 | c = GETC (); | |
811 | if (c == '\n' || c == EOF) | |
812 | { | |
813 | cpp_error (pfile, | |
814 | "missing '>' in `#include <FILENAME>'"); | |
815 | break; | |
816 | } | |
817 | else if (c == '\r') | |
818 | { | |
819 | if (!CPP_BUFFER (pfile)->has_escapes) | |
820 | { | |
821 | /* Backslash newline is replaced by nothing. */ | |
822 | CPP_ADJUST_WRITTEN (pfile, -1); | |
823 | CPP_BUMP_LINE (pfile); | |
824 | } | |
825 | else | |
826 | { | |
827 | /* We might conceivably get \r- or \r<space> in | |
828 | here. Just delete 'em. */ | |
829 | int d = GETC(); | |
830 | if (d != '-' && d != ' ') | |
831 | cpp_ice (pfile, "unrecognized escape \\r%c", d); | |
832 | CPP_ADJUST_WRITTEN (pfile, -1); | |
833 | } | |
834 | } | |
835 | } | |
836 | return CPP_STRING; | |
837 | } | |
838 | /* else fall through */ | |
839 | case '>': | |
840 | c2 = PEEKC (); | |
841 | if (c2 == '=') | |
842 | goto op2; | |
843 | /* GNU C++ supports MIN and MAX operators <? and >?. */ | |
ae79697b | 844 | if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?')) |
45b966db ZW |
845 | goto randomchar; |
846 | FORWARD(1); | |
847 | CPP_RESERVE (pfile, 4); | |
848 | CPP_PUTC (pfile, c); | |
849 | CPP_PUTC (pfile, c2); | |
850 | c3 = PEEKC (); | |
851 | if (c3 == '=') | |
852 | CPP_PUTC_Q (pfile, GETC ()); | |
853 | CPP_NUL_TERMINATE_Q (pfile); | |
854 | pfile->only_seen_white = 0; | |
855 | return CPP_OTHER; | |
856 | ||
857 | case '.': | |
858 | c2 = PEEKC (); | |
859 | if (ISDIGIT(c2)) | |
860 | { | |
861 | CPP_RESERVE(pfile, 2); | |
862 | CPP_PUTC_Q (pfile, '.'); | |
863 | c = GETC (); | |
864 | goto number; | |
865 | } | |
866 | ||
867 | /* In C++ there's a .* operator. */ | |
ae79697b | 868 | if (CPP_OPTION (pfile, cplusplus) && c2 == '*') |
45b966db ZW |
869 | goto op2; |
870 | ||
871 | if (c2 == '.' && PEEKN(1) == '.') | |
872 | { | |
873 | CPP_RESERVE(pfile, 4); | |
874 | CPP_PUTC_Q (pfile, '.'); | |
875 | CPP_PUTC_Q (pfile, '.'); | |
876 | CPP_PUTC_Q (pfile, '.'); | |
877 | FORWARD (2); | |
878 | CPP_NUL_TERMINATE_Q (pfile); | |
879 | pfile->only_seen_white = 0; | |
880 | return CPP_3DOTS; | |
881 | } | |
882 | goto randomchar; | |
883 | ||
884 | op2: | |
885 | token = CPP_OTHER; | |
886 | pfile->only_seen_white = 0; | |
887 | CPP_RESERVE(pfile, 3); | |
888 | CPP_PUTC_Q (pfile, c); | |
889 | CPP_PUTC_Q (pfile, GETC ()); | |
890 | CPP_NUL_TERMINATE_Q (pfile); | |
891 | return token; | |
892 | ||
893 | case 'L': | |
894 | c2 = PEEKC (); | |
895 | if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile)) | |
896 | { | |
897 | CPP_PUTC (pfile, c); | |
898 | c = GETC (); | |
899 | parse_string (pfile, c); | |
900 | pfile->only_seen_white = 0; | |
901 | return c == '\'' ? CPP_WCHAR : CPP_WSTRING; | |
902 | } | |
903 | goto letter; | |
904 | ||
905 | case '0': case '1': case '2': case '3': case '4': | |
906 | case '5': case '6': case '7': case '8': case '9': | |
907 | number: | |
908 | c2 = '.'; | |
909 | for (;;) | |
910 | { | |
911 | CPP_RESERVE (pfile, 2); | |
912 | CPP_PUTC_Q (pfile, c); | |
913 | c = PEEKC (); | |
914 | if (c == EOF) | |
915 | break; | |
916 | if (!is_numchar(c) && c != '.' | |
917 | && ((c2 != 'e' && c2 != 'E' | |
918 | && ((c2 != 'p' && c2 != 'P') | |
ae79697b | 919 | || CPP_OPTION (pfile, c89))) |
45b966db ZW |
920 | || (c != '+' && c != '-'))) |
921 | break; | |
922 | FORWARD(1); | |
923 | c2= c; | |
924 | } | |
925 | CPP_NUL_TERMINATE_Q (pfile); | |
926 | pfile->only_seen_white = 0; | |
927 | return CPP_NUMBER; | |
928 | case 'b': case 'c': case 'd': case 'h': case 'o': | |
929 | case 'B': case 'C': case 'D': case 'H': case 'O': | |
ae79697b | 930 | if (CPP_OPTION (pfile, chill) && PEEKC () == '\'') |
45b966db ZW |
931 | { |
932 | pfile->only_seen_white = 0; | |
933 | CPP_RESERVE (pfile, 2); | |
934 | CPP_PUTC_Q (pfile, c); | |
935 | CPP_PUTC_Q (pfile, '\''); | |
936 | FORWARD(1); | |
937 | for (;;) | |
938 | { | |
939 | c = GETC(); | |
940 | if (c == EOF) | |
941 | goto chill_number_eof; | |
942 | if (!is_numchar(c)) | |
943 | break; | |
944 | CPP_PUTC (pfile, c); | |
945 | } | |
946 | if (c == '\'') | |
947 | { | |
948 | CPP_RESERVE (pfile, 2); | |
949 | CPP_PUTC_Q (pfile, c); | |
950 | CPP_NUL_TERMINATE_Q (pfile); | |
951 | return CPP_STRING; | |
952 | } | |
953 | else | |
954 | { | |
955 | FORWARD(-1); | |
956 | chill_number_eof: | |
957 | CPP_NUL_TERMINATE (pfile); | |
958 | return CPP_NUMBER; | |
959 | } | |
960 | } | |
961 | else | |
962 | goto letter; | |
963 | case '_': | |
964 | case 'a': case 'e': case 'f': case 'g': case 'i': case 'j': | |
965 | case 'k': case 'l': case 'm': case 'n': case 'p': case 'q': | |
966 | case 'r': case 's': case 't': case 'u': case 'v': case 'w': | |
967 | case 'x': case 'y': case 'z': | |
968 | case 'A': case 'E': case 'F': case 'G': case 'I': case 'J': | |
969 | case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R': | |
970 | case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': | |
971 | case 'Y': case 'Z': | |
972 | letter: | |
973 | pfile->only_seen_white = 0; | |
974 | _cpp_parse_name (pfile, c); | |
975 | return CPP_MACRO; | |
976 | ||
977 | case ' ': case '\t': case '\v': | |
978 | for (;;) | |
979 | { | |
980 | CPP_PUTC (pfile, c); | |
981 | c = PEEKC (); | |
982 | if (c == EOF || !is_hspace(c)) | |
983 | break; | |
984 | FORWARD(1); | |
985 | } | |
986 | return CPP_HSPACE; | |
987 | ||
988 | case '\r': | |
989 | if (CPP_BUFFER (pfile)->has_escapes) | |
990 | { | |
991 | c = GETC (); | |
992 | if (c == '-') | |
993 | { | |
994 | if (pfile->output_escapes) | |
995 | CPP_PUTS (pfile, "\r-", 2); | |
996 | _cpp_parse_name (pfile, GETC ()); | |
997 | return CPP_NAME; | |
998 | } | |
999 | else if (c == ' ') | |
1000 | { | |
1001 | CPP_RESERVE (pfile, 2); | |
1002 | if (pfile->output_escapes) | |
1003 | CPP_PUTC_Q (pfile, '\r'); | |
1004 | CPP_PUTC_Q (pfile, c); | |
1005 | return CPP_HSPACE; | |
1006 | } | |
1007 | else | |
1008 | { | |
1009 | cpp_ice (pfile, "unrecognized escape \\r%c", c); | |
1010 | goto get_next; | |
1011 | } | |
1012 | } | |
1013 | else | |
1014 | { | |
1015 | /* Backslash newline is ignored. */ | |
1016 | CPP_BUMP_LINE (pfile); | |
1017 | goto get_next; | |
1018 | } | |
1019 | ||
1020 | case '\n': | |
1021 | CPP_PUTC (pfile, c); | |
1022 | if (pfile->only_seen_white == 0) | |
1023 | pfile->only_seen_white = 1; | |
1024 | CPP_BUMP_LINE (pfile); | |
ae79697b | 1025 | if (! CPP_OPTION (pfile, no_line_commands)) |
45b966db ZW |
1026 | { |
1027 | pfile->lineno++; | |
1028 | if (CPP_BUFFER (pfile)->lineno != pfile->lineno) | |
1029 | _cpp_output_line_command (pfile, same_file); | |
1030 | } | |
1031 | return CPP_VSPACE; | |
1032 | ||
1033 | case '(': token = CPP_LPAREN; goto char1; | |
1034 | case ')': token = CPP_RPAREN; goto char1; | |
1035 | case '{': token = CPP_LBRACE; goto char1; | |
1036 | case '}': token = CPP_RBRACE; goto char1; | |
1037 | case ',': token = CPP_COMMA; goto char1; | |
1038 | case ';': token = CPP_SEMICOLON; goto char1; | |
1039 | ||
1040 | randomchar: | |
1041 | default: | |
1042 | token = CPP_OTHER; | |
1043 | char1: | |
1044 | pfile->only_seen_white = 0; | |
1045 | CPP_PUTC (pfile, c); | |
1046 | return token; | |
1047 | } | |
1048 | } | |
1049 | ||
1050 | /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile). | |
1051 | Caller is expected to have checked no_macro_expand. */ | |
1052 | static int | |
1053 | maybe_macroexpand (pfile, written) | |
1054 | cpp_reader *pfile; | |
1055 | long written; | |
1056 | { | |
1057 | U_CHAR *macro = pfile->token_buffer + written; | |
1058 | size_t len = CPP_WRITTEN (pfile) - written; | |
1059 | HASHNODE *hp = _cpp_lookup (pfile, macro, len); | |
1060 | ||
1061 | if (!hp) | |
1062 | return 0; | |
1063 | if (hp->type == T_DISABLED) | |
1064 | { | |
1065 | if (pfile->output_escapes) | |
1066 | { | |
1067 | /* Insert a no-reexpand marker before IDENT. */ | |
1068 | CPP_RESERVE (pfile, 2); | |
1069 | CPP_ADJUST_WRITTEN (pfile, 2); | |
1070 | macro = pfile->token_buffer + written; | |
1071 | ||
1072 | memmove (macro + 2, macro, len); | |
1073 | macro[0] = '\r'; | |
1074 | macro[1] = '-'; | |
1075 | } | |
1076 | return 0; | |
1077 | } | |
1078 | ||
1079 | /* If macro wants an arglist, verify that a '(' follows. */ | |
1080 | if (hp->type == T_MACRO && hp->value.defn->nargs >= 0) | |
1081 | { | |
1082 | int macbuf_whitespace = 0; | |
1083 | int c; | |
1084 | ||
1085 | while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile))) | |
1086 | { | |
1087 | const U_CHAR *point = CPP_BUFFER (pfile)->cur; | |
1088 | for (;;) | |
1089 | { | |
1090 | _cpp_skip_hspace (pfile); | |
1091 | c = PEEKC (); | |
1092 | if (c == '\n') | |
1093 | FORWARD(1); | |
1094 | else | |
1095 | break; | |
1096 | } | |
1097 | if (point != CPP_BUFFER (pfile)->cur) | |
1098 | macbuf_whitespace = 1; | |
1099 | if (c == '(') | |
1100 | goto is_macro_call; | |
1101 | else if (c != EOF) | |
1102 | goto not_macro_call; | |
1103 | cpp_pop_buffer (pfile); | |
1104 | } | |
1105 | ||
1106 | CPP_SET_MARK (pfile); | |
1107 | for (;;) | |
1108 | { | |
1109 | _cpp_skip_hspace (pfile); | |
1110 | c = PEEKC (); | |
1111 | if (c == '\n') | |
1112 | FORWARD(1); | |
1113 | else | |
1114 | break; | |
1115 | } | |
1116 | CPP_GOTO_MARK (pfile); | |
1117 | ||
1118 | if (c != '(') | |
1119 | { | |
1120 | not_macro_call: | |
1121 | if (macbuf_whitespace) | |
1122 | CPP_PUTC (pfile, ' '); | |
1123 | return 0; | |
1124 | } | |
1125 | } | |
1126 | ||
1127 | is_macro_call: | |
1128 | /* This is now known to be a macro call. | |
1129 | Expand the macro, reading arguments as needed, | |
1130 | and push the expansion on the input stack. */ | |
1131 | _cpp_macroexpand (pfile, hp); | |
1132 | CPP_SET_WRITTEN (pfile, written); | |
1133 | return 1; | |
1134 | } | |
1135 | ||
1136 | enum cpp_token | |
1137 | cpp_get_token (pfile) | |
1138 | cpp_reader *pfile; | |
1139 | { | |
1140 | enum cpp_token token; | |
1141 | long written = CPP_WRITTEN (pfile); | |
1142 | ||
1143 | get_next: | |
1144 | token = _cpp_lex_token (pfile); | |
1145 | ||
1146 | switch (token) | |
1147 | { | |
1148 | default: | |
1149 | return token; | |
1150 | ||
1151 | case CPP_DIRECTIVE: | |
1152 | if (_cpp_handle_directive (pfile)) | |
1153 | return CPP_DIRECTIVE; | |
1154 | pfile->only_seen_white = 0; | |
1155 | CPP_PUTC (pfile, '#'); | |
1156 | return CPP_OTHER; | |
1157 | ||
1158 | case CPP_MACRO: | |
1159 | if (! pfile->no_macro_expand | |
1160 | && maybe_macroexpand (pfile, written)) | |
1161 | goto get_next; | |
1162 | return CPP_NAME; | |
1163 | ||
1164 | case CPP_EOF: | |
1165 | if (CPP_BUFFER (pfile)->manual_pop) | |
1166 | /* If we've been reading from redirected input, the | |
1167 | frontend will pop the buffer. */ | |
1168 | return CPP_EOF; | |
1169 | else if (CPP_BUFFER (pfile)->seen_eof) | |
1170 | { | |
1171 | if (CPP_PREV_BUFFER (CPP_BUFFER (pfile)) == NULL) | |
1172 | return CPP_EOF; | |
1173 | ||
1174 | cpp_pop_buffer (pfile); | |
1175 | goto get_next; | |
1176 | } | |
1177 | else | |
1178 | { | |
1179 | _cpp_handle_eof (pfile); | |
1180 | return CPP_POP; | |
1181 | } | |
1182 | } | |
1183 | } | |
1184 | ||
1185 | /* Like cpp_get_token, but skip spaces and comments. */ | |
1186 | ||
1187 | enum cpp_token | |
1188 | cpp_get_non_space_token (pfile) | |
1189 | cpp_reader *pfile; | |
1190 | { | |
1191 | int old_written = CPP_WRITTEN (pfile); | |
1192 | for (;;) | |
1193 | { | |
1194 | enum cpp_token token = cpp_get_token (pfile); | |
1195 | if (token != CPP_COMMENT && token != CPP_POP | |
1196 | && token != CPP_HSPACE && token != CPP_VSPACE) | |
1197 | return token; | |
1198 | CPP_SET_WRITTEN (pfile, old_written); | |
1199 | } | |
1200 | } | |
1201 | ||
1202 | /* Like cpp_get_token, except that it does not read past end-of-line. | |
1203 | Also, horizontal space is skipped, and macros are popped. */ | |
1204 | ||
1205 | enum cpp_token | |
1206 | _cpp_get_directive_token (pfile) | |
1207 | cpp_reader *pfile; | |
1208 | { | |
1209 | long old_written = CPP_WRITTEN (pfile); | |
1210 | enum cpp_token token; | |
1211 | ||
1212 | for (;;) | |
1213 | { | |
1214 | _cpp_skip_hspace (pfile); | |
1215 | if (PEEKC () == '\n') | |
1216 | return CPP_VSPACE; | |
1217 | ||
1218 | token = cpp_get_token (pfile); | |
1219 | /* token could be hspace at the beginning of a macro. */ | |
1220 | if (token == CPP_HSPACE || token == CPP_COMMENT) | |
1221 | { | |
1222 | CPP_SET_WRITTEN (pfile, old_written); | |
1223 | continue; | |
1224 | } | |
1225 | ||
1226 | /* token cannot be vspace, it would have been caught above. */ | |
1227 | if (token == CPP_VSPACE) | |
1228 | { | |
1229 | cpp_ice (pfile, "VSPACE in get_directive_token"); | |
1230 | return token; | |
1231 | } | |
1232 | ||
1233 | /* token cannot be POP unless the buffer is a macro buffer. */ | |
1234 | if (token != CPP_POP) | |
1235 | return token; | |
1236 | ||
1237 | if (! CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile))) | |
1238 | { | |
1239 | cpp_ice (pfile, "POP of file buffer in get_directive_token"); | |
1240 | return token; | |
1241 | } | |
1242 | ||
1243 | /* We must pop the buffer by hand, or else cpp_get_token might | |
1244 | hand us white space or newline on the next invocation. */ | |
1245 | cpp_pop_buffer (pfile); | |
1246 | } | |
1247 | } | |
1248 | ||
1249 | /* Determine the current line and column. Used only by read_and_prescan. */ | |
1250 | static U_CHAR * | |
1251 | find_position (start, limit, linep) | |
1252 | U_CHAR *start; | |
1253 | U_CHAR *limit; | |
1254 | unsigned long *linep; | |
1255 | { | |
1256 | unsigned long line = *linep; | |
1257 | U_CHAR *lbase = start; | |
1258 | while (start < limit) | |
1259 | { | |
1260 | U_CHAR ch = *start++; | |
1261 | if (ch == '\n' || ch == '\r') | |
1262 | { | |
1263 | line++; | |
1264 | lbase = start; | |
1265 | } | |
1266 | } | |
1267 | *linep = line; | |
1268 | return lbase; | |
1269 | } | |
1270 | ||
46d07497 ZW |
1271 | /* These are tables used by _cpp_read_and_prescan. If we have |
1272 | designated initializers, they can be constant data; otherwise, they | |
1273 | are set up at runtime by _cpp_init_input_buffer. */ | |
1274 | ||
1275 | #ifndef UCHAR_MAX | |
1276 | #define UCHAR_MAX 255 /* assume 8-bit bytes */ | |
1277 | #endif | |
1278 | ||
1279 | #if (GCC_VERSION >= 2007) || (__STDC_VERSION__ >= 199901L) | |
1280 | #define CHARTAB(name) static const unsigned char name[UCHAR_MAX + 1] | |
1281 | #define init_speccase() /* nothing */ | |
1282 | #define init_trigraph_map() /* nothing */ | |
1283 | #define SPECCASE CHARTAB(speccase) = { | |
1284 | #define TRIGRAPH_MAP CHARTAB(trigraph_map) = { | |
1285 | #define END }; | |
1286 | #define s(p, v) [p] = v, | |
1287 | #else | |
1288 | #define CHARTAB(name) static unsigned char name[UCHAR_MAX + 1] | |
1289 | #define SPECCASE CHARTAB(speccase) = { 0 }; \ | |
1290 | static void init_speccase PARAMS ((void)) { \ | |
1291 | unsigned char *x = speccase; | |
1292 | #define TRIGRAPH_MAP CHARTAB(trigraph_map) = { 0 }; \ | |
1293 | static void init_trigraph_map PARAMS ((void)) { \ | |
1294 | unsigned char *x = trigraph_map; | |
1295 | #define END } | |
1296 | #define s(p, v) x[p] = v; | |
1297 | #endif | |
1298 | ||
1299 | /* Table of characters that can't be handled in the inner loop. | |
1300 | Keep these contiguous to optimize the performance of the code generated | |
1301 | for the switch that uses them. */ | |
1302 | #define SPECCASE_EMPTY 0 | |
1303 | #define SPECCASE_CR 1 | |
1304 | #define SPECCASE_BACKSLASH 2 | |
1305 | #define SPECCASE_QUESTION 3 | |
1306 | ||
1307 | SPECCASE | |
1308 | s('\r', SPECCASE_CR) | |
1309 | s('\\', SPECCASE_BACKSLASH) | |
1310 | s('?', SPECCASE_QUESTION) | |
1311 | END | |
1312 | ||
1313 | /* Map of trigraph third characters to their replacements. */ | |
1314 | ||
1315 | TRIGRAPH_MAP | |
1316 | s('=', '#') s(')', ']') s('!', '|') | |
1317 | s('(', '[') s('\'', '^') s('>', '}') | |
1318 | s('/', '\\') s('<', '{') s('-', '~') | |
1319 | END | |
1320 | ||
1321 | #undef CHARTAB | |
1322 | #undef SPECCASE | |
1323 | #undef TRIGRAPH_MAP | |
1324 | #undef END | |
1325 | #undef s | |
1326 | ||
45b966db ZW |
1327 | /* Read the entire contents of file DESC into buffer BUF. LEN is how |
1328 | much memory to allocate initially; more will be allocated if | |
1329 | necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to | |
1330 | canonical form (\n). If enabled, convert and/or warn about | |
1331 | trigraphs. Convert backslash-newline to a one-character escape | |
1332 | (\r) and remove it from "embarrassing" places (i.e. the middle of a | |
1333 | token). If there is no newline at the end of the file, add one and | |
1334 | warn. Returns -1 on failure, or the actual length of the data to | |
1335 | be scanned. | |
1336 | ||
1337 | This function does a lot of work, and can be a serious performance | |
1338 | bottleneck. It has been tuned heavily; make sure you understand it | |
1339 | before hacking. The common case - no trigraphs, Unix style line | |
1340 | breaks, backslash-newline set off by whitespace, newline at EOF - | |
1341 | has been optimized at the expense of the others. The performance | |
1342 | penalty for DOS style line breaks (\r\n) is about 15%. | |
1343 | ||
1344 | Warnings lose particularly heavily since we have to determine the | |
1345 | line number, which involves scanning from the beginning of the file | |
1346 | or from the last warning. The penalty for the absence of a newline | |
1347 | at the end of reload1.c is about 60%. (reload1.c is 329k.) | |
1348 | ||
1349 | If your file has more than one kind of end-of-line marker, you | |
04e3ec78 NB |
1350 | will get messed-up line numbering. |
1351 | ||
1352 | So that the cases of the switch statement do not have to concern | |
1353 | themselves with the complications of reading beyond the end of the | |
1354 | buffer, the buffer is guaranteed to have at least 3 characters in | |
1355 | it (or however many are left in the file, if less) on entry to the | |
1356 | switch. This is enough to handle trigraphs and the "\\\n\r" and | |
1357 | "\\\r\n" cases. | |
1358 | ||
1359 | The end of the buffer is marked by a '\\', which, being a special | |
1360 | character, guarantees we will exit the fast-scan loops and perform | |
1361 | a refill. */ | |
46d07497 | 1362 | |
45b966db ZW |
1363 | long |
1364 | _cpp_read_and_prescan (pfile, fp, desc, len) | |
1365 | cpp_reader *pfile; | |
1366 | cpp_buffer *fp; | |
1367 | int desc; | |
1368 | size_t len; | |
1369 | { | |
1370 | U_CHAR *buf = (U_CHAR *) xmalloc (len); | |
1371 | U_CHAR *ip, *op, *line_base; | |
1372 | U_CHAR *ibase; | |
45b966db ZW |
1373 | unsigned long line; |
1374 | unsigned int deferred_newlines; | |
45b966db | 1375 | size_t offset; |
04e3ec78 | 1376 | int count = 0; |
45b966db ZW |
1377 | |
1378 | offset = 0; | |
04e3ec78 | 1379 | deferred_newlines = 0; |
45b966db ZW |
1380 | op = buf; |
1381 | line_base = buf; | |
1382 | line = 1; | |
04e3ec78 NB |
1383 | ibase = pfile->input_buffer + 3; |
1384 | ip = ibase; | |
1385 | ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */ | |
45b966db ZW |
1386 | |
1387 | for (;;) | |
1388 | { | |
04e3ec78 NB |
1389 | U_CHAR *near_buff_end; |
1390 | ||
1391 | /* Copy previous char plus unprocessed (at most 2) chars | |
1392 | to beginning of buffer, refill it with another | |
1393 | read(), and continue processing */ | |
1394 | memcpy(ip - count - 1, ip - 1, 3); | |
1395 | ip -= count; | |
45b966db | 1396 | |
04e3ec78 | 1397 | count = read (desc, ibase, pfile->input_buffer_len); |
45b966db ZW |
1398 | if (count < 0) |
1399 | goto error; | |
04e3ec78 NB |
1400 | |
1401 | ibase[count] = '\\'; /* Marks end of buffer */ | |
1402 | if (count) | |
45b966db | 1403 | { |
04e3ec78 NB |
1404 | near_buff_end = pfile->input_buffer + count; |
1405 | offset += count; | |
45b966db | 1406 | if (offset > len) |
04e3ec78 NB |
1407 | { |
1408 | size_t delta_op; | |
1409 | size_t delta_line_base; | |
1410 | len *= 2; | |
1411 | if (offset > len) | |
1412 | /* len overflowed. | |
1413 | This could happen if the file is larger than half the | |
1414 | maximum address space of the machine. */ | |
1415 | goto too_big; | |
1416 | ||
1417 | delta_op = op - buf; | |
1418 | delta_line_base = line_base - buf; | |
1419 | buf = (U_CHAR *) xrealloc (buf, len); | |
1420 | op = buf + delta_op; | |
1421 | line_base = buf + delta_line_base; | |
1422 | } | |
1423 | } | |
1424 | else | |
1425 | { | |
1426 | if (ip == ibase) | |
1427 | break; | |
1428 | /* Allow normal processing of the (at most 2) remaining | |
1429 | characters. The end-of-buffer marker is still present | |
1430 | and prevents false matches within the switch. */ | |
1431 | near_buff_end = ibase - 1; | |
45b966db ZW |
1432 | } |
1433 | ||
1434 | for (;;) | |
1435 | { | |
04e3ec78 | 1436 | unsigned int span; |
45b966db | 1437 | |
04e3ec78 | 1438 | /* Deal with \-newline, potentially in the middle of a token. */ |
45b966db ZW |
1439 | if (deferred_newlines) |
1440 | { | |
04e3ec78 NB |
1441 | if (op != buf && op[-1] != ' ' && op[-1] != '\n' && op[-1] != '\t' && op[-1] != '\r') |
1442 | { | |
1443 | /* Previous was not white space. Skip to white | |
1444 | space, if we can, before outputting the \r's */ | |
1445 | span = 0; | |
1446 | while (ip[span] != ' ' | |
1447 | && ip[span] != '\t' | |
1448 | && ip[span] != '\n' | |
1449 | && speccase[ip[span]] == SPECCASE_EMPTY) | |
1450 | span++; | |
1451 | memcpy (op, ip, span); | |
1452 | op += span; | |
1453 | ip += span; | |
1454 | if (speccase[ip[0]] != SPECCASE_EMPTY) | |
1455 | goto do_speccase; | |
1456 | } | |
1457 | while (deferred_newlines) | |
1458 | deferred_newlines--, *op++ = '\r'; | |
45b966db ZW |
1459 | } |
1460 | ||
1461 | /* Copy as much as we can without special treatment. */ | |
04e3ec78 | 1462 | span = 0; |
45b966db ZW |
1463 | while (speccase[ip[span]] == SPECCASE_EMPTY) span++; |
1464 | memcpy (op, ip, span); | |
1465 | op += span; | |
1466 | ip += span; | |
1467 | ||
04e3ec78 NB |
1468 | do_speccase: |
1469 | if (ip > near_buff_end) /* Do we have enough chars? */ | |
1470 | break; | |
45b966db ZW |
1471 | switch (speccase[*ip++]) |
1472 | { | |
45b966db | 1473 | case SPECCASE_CR: /* \r */ |
04e3ec78 | 1474 | if (ip[-2] != '\n') |
45b966db | 1475 | { |
04e3ec78 NB |
1476 | if (*ip == '\n') |
1477 | ip++; | |
1478 | *op++ = '\n'; | |
45b966db | 1479 | } |
45b966db ZW |
1480 | break; |
1481 | ||
1482 | case SPECCASE_BACKSLASH: /* \ */ | |
04e3ec78 | 1483 | if (*ip == '\n') |
45b966db | 1484 | { |
04e3ec78 | 1485 | deferred_newlines++; |
45b966db ZW |
1486 | ip++; |
1487 | if (*ip == '\r') ip++; | |
45b966db ZW |
1488 | } |
1489 | else if (*ip == '\r') | |
1490 | { | |
04e3ec78 | 1491 | deferred_newlines++; |
45b966db ZW |
1492 | ip++; |
1493 | if (*ip == '\n') ip++; | |
45b966db ZW |
1494 | } |
1495 | else | |
1496 | *op++ = '\\'; | |
04e3ec78 | 1497 | break; |
45b966db ZW |
1498 | |
1499 | case SPECCASE_QUESTION: /* ? */ | |
1500 | { | |
1501 | unsigned int d, t; | |
04e3ec78 NB |
1502 | |
1503 | *op++ = '?'; /* Normal non-trigraph case */ | |
1504 | if (ip[0] != '?') | |
1505 | break; | |
1506 | ||
45b966db | 1507 | d = ip[1]; |
04e3ec78 NB |
1508 | t = trigraph_map[d]; |
1509 | if (t == 0) | |
1510 | break; | |
45b966db | 1511 | |
ae79697b | 1512 | if (CPP_OPTION (pfile, warn_trigraphs)) |
45b966db ZW |
1513 | { |
1514 | unsigned long col; | |
1515 | line_base = find_position (line_base, op, &line); | |
1516 | col = op - line_base + 1; | |
ae79697b | 1517 | if (CPP_OPTION (pfile, trigraphs)) |
45b966db | 1518 | cpp_warning_with_line (pfile, line, col, |
04e3ec78 | 1519 | "trigraph ??%c converted to %c", d, t); |
45b966db ZW |
1520 | else |
1521 | cpp_warning_with_line (pfile, line, col, | |
04e3ec78 | 1522 | "trigraph ??%c ignored", d); |
45b966db | 1523 | } |
04e3ec78 NB |
1524 | |
1525 | ip += 2; | |
ae79697b | 1526 | if (CPP_OPTION (pfile, trigraphs)) |
45b966db | 1527 | { |
04e3ec78 | 1528 | op[-1] = t; /* Overwrite '?' */ |
45b966db | 1529 | if (t == '\\') |
04e3ec78 NB |
1530 | { |
1531 | op--; | |
1532 | *--ip = '\\'; | |
1533 | goto do_speccase; /* May need buffer refill */ | |
1534 | } | |
45b966db ZW |
1535 | } |
1536 | else | |
1537 | { | |
45b966db ZW |
1538 | *op++ = '?'; |
1539 | *op++ = d; | |
1540 | } | |
1541 | } | |
04e3ec78 | 1542 | break; |
45b966db ZW |
1543 | } |
1544 | } | |
1545 | } | |
1546 | ||
1547 | if (offset == 0) | |
1548 | return 0; | |
1549 | ||
45b966db ZW |
1550 | if (op[-1] != '\n') |
1551 | { | |
1552 | unsigned long col; | |
1553 | line_base = find_position (line_base, op, &line); | |
1554 | col = op - line_base + 1; | |
1555 | cpp_warning_with_line (pfile, line, col, "no newline at end of file\n"); | |
1556 | if (offset + 1 > len) | |
1557 | { | |
1558 | len += 1; | |
1559 | if (offset + 1 > len) | |
1560 | goto too_big; | |
1561 | buf = (U_CHAR *) xrealloc (buf, len); | |
1562 | op = buf + offset; | |
1563 | } | |
1564 | *op++ = '\n'; | |
1565 | } | |
1566 | ||
1567 | fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf)); | |
1568 | return op - buf; | |
1569 | ||
1570 | too_big: | |
1571 | cpp_error (pfile, "file is too large (>%lu bytes)\n", (unsigned long)offset); | |
1572 | free (buf); | |
1573 | return -1; | |
1574 | ||
1575 | error: | |
1576 | cpp_error_from_errno (pfile, fp->ihash->name); | |
1577 | free (buf); | |
1578 | return -1; | |
1579 | } | |
1580 | ||
46d07497 ZW |
1581 | /* Allocate pfile->input_buffer, and initialize speccase[] and |
1582 | trigraph_map[] if it hasn't happened already. */ | |
1583 | ||
45b966db ZW |
1584 | void |
1585 | _cpp_init_input_buffer (pfile) | |
1586 | cpp_reader *pfile; | |
1587 | { | |
1588 | U_CHAR *tmp; | |
1589 | ||
46d07497 ZW |
1590 | init_speccase (); |
1591 | init_trigraph_map (); | |
04e3ec78 | 1592 | |
45b966db ZW |
1593 | /* Determine the appropriate size for the input buffer. Normal C |
1594 | source files are smaller than eight K. */ | |
04e3ec78 NB |
1595 | /* 8Kbytes of buffer proper, 1 to detect running off the end without |
1596 | address arithmetic all the time, and 3 for pushback during buffer | |
1597 | refill, in case there's a potential trigraph or end-of-line | |
1598 | digraph at the end of a block. */ | |
45b966db | 1599 | |
04e3ec78 | 1600 | tmp = (U_CHAR *) xmalloc (8192 + 1 + 3); |
45b966db ZW |
1601 | pfile->input_buffer = tmp; |
1602 | pfile->input_buffer_len = 8192; | |
1603 | } |