]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
20000502-1.c: New test.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23#include "config.h"
24#include "system.h"
25#include "intl.h"
26#include "cpplib.h"
27#include "cpphash.h"
28
ff2b53ef
ZW
29#define PEEKBUF(BUFFER, N) \
30 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
31#define GETBUF(BUFFER) \
32 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
33#define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
34
35#define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
36#define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
37#define GETC() GETBUF (CPP_BUFFER (pfile))
38#define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
45b966db
ZW
39
40static void skip_block_comment PARAMS ((cpp_reader *));
41static void skip_line_comment PARAMS ((cpp_reader *));
42static int maybe_macroexpand PARAMS ((cpp_reader *, long));
43static int skip_comment PARAMS ((cpp_reader *, int));
44static int copy_comment PARAMS ((cpp_reader *, int));
45static void skip_string PARAMS ((cpp_reader *, int));
46static void parse_string PARAMS ((cpp_reader *, int));
47static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
48static int null_cleanup PARAMS ((cpp_buffer *, cpp_reader *));
64aaf407 49static void null_warning PARAMS ((cpp_reader *, unsigned int));
45b966db 50
f2d5f0cc
ZW
51static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
52 size_t, FILE *));
1368ee70
ZW
53static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
54 unsigned int));
55static void bump_column PARAMS ((cpp_printer *, unsigned int,
56 unsigned int));
c5a04734 57static void expand_name_space PARAMS ((cpp_toklist *, unsigned int));
1368ee70
ZW
58static void expand_token_space PARAMS ((cpp_toklist *));
59static void init_token_list PARAMS ((cpp_reader *, cpp_toklist *, int));
9e62c811
ZW
60static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
61 unsigned int));
f2d5f0cc 62
c5a04734
ZW
63#define auto_expand_name_space(list) \
64 expand_name_space ((list), (list)->name_cap / 2)
65
45b966db
ZW
66/* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
67
68void
69_cpp_grow_token_buffer (pfile, n)
70 cpp_reader *pfile;
71 long n;
72{
73 long old_written = CPP_WRITTEN (pfile);
74 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
75 pfile->token_buffer = (U_CHAR *)
76 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
77 CPP_SET_WRITTEN (pfile, old_written);
78}
79
80static int
81null_cleanup (pbuf, pfile)
82 cpp_buffer *pbuf ATTRIBUTE_UNUSED;
83 cpp_reader *pfile ATTRIBUTE_UNUSED;
84{
85 return 0;
86}
87
88/* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
89 If BUFFER != NULL, then use the LENGTH characters in BUFFER
90 as the new input buffer.
91 Return the new buffer, or NULL on failure. */
92
93cpp_buffer *
94cpp_push_buffer (pfile, buffer, length)
95 cpp_reader *pfile;
96 const U_CHAR *buffer;
97 long length;
98{
99 cpp_buffer *buf = CPP_BUFFER (pfile);
100 cpp_buffer *new;
101 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
102 {
103 cpp_fatal (pfile, "macro or `#include' recursion too deep");
104 return NULL;
105 }
106
107 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
108
109 new->if_stack = pfile->if_stack;
110 new->cleanup = null_cleanup;
111 new->buf = new->cur = buffer;
ff2b53ef 112 new->rlimit = buffer + length;
45b966db 113 new->prev = buf;
ff2b53ef 114 new->mark = NULL;
45b966db
ZW
115 new->line_base = NULL;
116
117 CPP_BUFFER (pfile) = new;
118 return new;
119}
120
121cpp_buffer *
122cpp_pop_buffer (pfile)
123 cpp_reader *pfile;
124{
125 cpp_buffer *buf = CPP_BUFFER (pfile);
126 if (ACTIVE_MARK_P (pfile))
127 cpp_ice (pfile, "mark active in cpp_pop_buffer");
128 (*buf->cleanup) (buf, pfile);
129 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
130 free (buf);
131 pfile->buffer_stack_depth--;
132 return CPP_BUFFER (pfile);
133}
134
f2d5f0cc
ZW
135/* Deal with the annoying semantics of fwrite. */
136static void
137safe_fwrite (pfile, buf, len, fp)
138 cpp_reader *pfile;
139 const U_CHAR *buf;
140 size_t len;
141 FILE *fp;
142{
143 size_t count;
45b966db 144
f2d5f0cc
ZW
145 while (len)
146 {
147 count = fwrite (buf, 1, len, fp);
148 if (count == 0)
149 goto error;
150 len -= count;
151 buf += count;
152 }
153 return;
154
155 error:
156 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
157}
158
159/* Notify the compiler proper that the current line number has jumped,
160 or the current file name has changed. */
161
162static void
1368ee70 163output_line_command (pfile, print, line)
45b966db 164 cpp_reader *pfile;
f2d5f0cc 165 cpp_printer *print;
1368ee70 166 unsigned int line;
45b966db 167{
1368ee70 168 cpp_buffer *ip = cpp_file_buffer (pfile);
f2d5f0cc
ZW
169 enum { same = 0, enter, leave, rname } change;
170 static const char * const codes[] = { "", " 1", " 2", "" };
171
172 if (CPP_OPTION (pfile, no_line_commands))
173 return;
174
f2d5f0cc
ZW
175 /* Determine whether the current filename has changed, and if so,
176 how. 'nominal_fname' values are unique, so they can be compared
177 by comparing pointers. */
178 if (ip->nominal_fname == print->last_fname)
179 change = same;
180 else
45b966db 181 {
f2d5f0cc
ZW
182 if (pfile->buffer_stack_depth == print->last_bsd)
183 change = rname;
184 else
45b966db 185 {
f2d5f0cc
ZW
186 if (pfile->buffer_stack_depth > print->last_bsd)
187 change = enter;
188 else
189 change = leave;
190 print->last_bsd = pfile->buffer_stack_depth;
45b966db 191 }
f2d5f0cc 192 print->last_fname = ip->nominal_fname;
45b966db 193 }
f2d5f0cc
ZW
194 /* If the current file has not changed, we can output a few newlines
195 instead if we want to increase the line number by a small amount.
196 We cannot do this if print->lineno is zero, because that means we
197 haven't output any line commands yet. (The very first line
198 command output is a `same_file' command.) */
199 if (change == same && print->lineno != 0
200 && line >= print->lineno && line < print->lineno + 8)
45b966db 201 {
f2d5f0cc 202 while (line > print->lineno)
45b966db 203 {
f2d5f0cc
ZW
204 putc ('\n', print->outf);
205 print->lineno++;
45b966db 206 }
f2d5f0cc 207 return;
45b966db 208 }
f2d5f0cc
ZW
209
210#ifndef NO_IMPLICIT_EXTERN_C
211 if (CPP_OPTION (pfile, cplusplus))
212 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
213 codes[change],
214 ip->system_header_p ? " 3" : "",
215 (ip->system_header_p == 2) ? " 4" : "");
216 else
217#endif
218 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
219 codes[change],
220 ip->system_header_p ? " 3" : "");
221 print->lineno = line;
222}
223
224/* Write the contents of the token_buffer to the output stream, and
225 clear the token_buffer. Also handles generating line commands and
226 keeping track of file transitions. */
227
228void
229cpp_output_tokens (pfile, print)
230 cpp_reader *pfile;
231 cpp_printer *print;
232{
1368ee70
ZW
233 cpp_buffer *ip;
234
f6fab919
ZW
235 if (CPP_WRITTEN (pfile) - print->written)
236 {
237 if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
238 print->lineno++;
239 safe_fwrite (pfile, pfile->token_buffer,
240 CPP_WRITTEN (pfile) - print->written, print->outf);
241 }
1368ee70
ZW
242
243 ip = cpp_file_buffer (pfile);
244 if (ip)
245 output_line_command (pfile, print, CPP_BUF_LINE (ip));
246
f2d5f0cc 247 CPP_SET_WRITTEN (pfile, print->written);
45b966db
ZW
248}
249
1368ee70
ZW
250/* Helper for cpp_output_list - increases the column number to match
251 what we expect it to be. */
252
253static void
254bump_column (print, from, to)
255 cpp_printer *print;
256 unsigned int from, to;
257{
258 unsigned int tabs, spcs;
259 unsigned int delta = to - from;
260
261 /* Only if FROM is 0, advance by tabs. */
262 if (from == 0)
263 tabs = delta / 8, spcs = delta % 8;
264 else
265 tabs = 0, spcs = delta;
266
267 while (tabs--) putc ('\t', print->outf);
268 while (spcs--) putc (' ', print->outf);
269}
270
271/* Write out the list L onto pfile->token_buffer. This function is
272 incomplete:
273
274 1) pfile->token_buffer is not going to continue to exist.
275 2) At the moment, tokens don't carry the information described
276 in cpplib.h; they are all strings.
277 3) The list has to be a complete line, and has to be written starting
278 at the beginning of a line. */
279
280void
281cpp_output_list (pfile, print, list)
282 cpp_reader *pfile;
283 cpp_printer *print;
284 const cpp_toklist *list;
285{
286 unsigned int i;
287 unsigned int curcol = 1;
288
289 /* XXX Probably does not do what is intended. */
290 if (print->lineno != list->line)
291 output_line_command (pfile, print, list->line);
292
293 for (i = 0; i < list->tokens_used; i++)
294 {
295 if (list->tokens[i].type == CPP_VSPACE)
296 {
297 output_line_command (pfile, print, list->tokens[i].aux);
298 continue;
299 }
300
301 if (curcol < list->tokens[i].col)
302 {
303 /* Insert space to bring the column to what it should be. */
304 bump_column (print, curcol - 1, list->tokens[i].col);
305 curcol = list->tokens[i].col;
306 }
307 /* XXX We may have to insert space to prevent an accidental
308 token paste. */
309 safe_fwrite (pfile, list->namebuf + list->tokens[i].val.name.offset,
310 list->tokens[i].val.name.len, print->outf);
311 curcol += list->tokens[i].val.name.len;
312 }
313}
314
f2d5f0cc
ZW
315/* Scan a string (which may have escape marks), perform macro expansion,
316 and write the result to the token_buffer. */
45b966db
ZW
317
318void
f2d5f0cc 319_cpp_expand_to_buffer (pfile, buf, length)
45b966db
ZW
320 cpp_reader *pfile;
321 const U_CHAR *buf;
322 int length;
323{
f2d5f0cc
ZW
324 cpp_buffer *ip;
325 enum cpp_ttype token;
f6fab919 326 U_CHAR *buf1;
45b966db
ZW
327
328 if (length < 0)
329 {
330 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
331 return;
332 }
333
f6fab919
ZW
334 /* Copy the buffer, because it might be in an unsafe place - for
335 example, a sequence on the token_buffer, where the pointers will
336 be invalidated if we enlarge the token_buffer. */
337 buf1 = alloca (length);
338 memcpy (buf1, buf, length);
339
45b966db 340 /* Set up the input on the input stack. */
f6fab919 341 ip = cpp_push_buffer (pfile, buf1, length);
45b966db
ZW
342 if (ip == NULL)
343 return;
344 ip->has_escapes = 1;
345
346 /* Scan the input, create the output. */
f2d5f0cc
ZW
347 for (;;)
348 {
349 token = cpp_get_token (pfile);
350 if (token == CPP_EOF)
351 break;
352 if (token == CPP_POP && CPP_BUFFER (pfile) == ip)
353 {
354 cpp_pop_buffer (pfile);
355 break;
356 }
357 }
45b966db
ZW
358}
359
f2d5f0cc
ZW
360/* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.
361 Then pop the buffer. */
362
363void
364cpp_scan_buffer_nooutput (pfile)
365 cpp_reader *pfile;
366{
367 cpp_buffer *buffer = CPP_BUFFER (pfile);
368 enum cpp_ttype token;
369 unsigned int old_written = CPP_WRITTEN (pfile);
370 /* In no-output mode, we can ignore everything but directives. */
371 for (;;)
372 {
373 if (! pfile->only_seen_white)
374 _cpp_skip_rest_of_line (pfile);
375 token = cpp_get_token (pfile);
376 if (token == CPP_EOF)
377 break;
378 if (token == CPP_POP && CPP_BUFFER (pfile) == buffer)
379 {
380 cpp_pop_buffer (pfile);
381 break;
382 }
383 }
384 CPP_SET_WRITTEN (pfile, old_written);
385}
386
387/* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.
388 Then pop the buffer. */
389
390void
391cpp_scan_buffer (pfile, print)
392 cpp_reader *pfile;
393 cpp_printer *print;
394{
395 cpp_buffer *buffer = CPP_BUFFER (pfile);
396 enum cpp_ttype token;
397
398 for (;;)
399 {
400 token = cpp_get_token (pfile);
401 if ((token == CPP_POP && !CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
402 || token == CPP_EOF || token == CPP_VSPACE
403 /* XXX Temporary kluge - force flush after #include only */
404 || (token == CPP_DIRECTIVE
405 && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
406 {
407 cpp_output_tokens (pfile, print);
408 if (token == CPP_EOF)
409 return;
410 if (token == CPP_POP && CPP_BUFFER (pfile) == buffer)
411 {
412 cpp_pop_buffer (pfile);
413 return;
414 }
415 }
416 }
417}
418
45b966db
ZW
419/* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
420
421cpp_buffer *
422cpp_file_buffer (pfile)
423 cpp_reader *pfile;
424{
425 cpp_buffer *ip;
426
427 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
428 if (ip->ihash != NULL)
429 return ip;
430 return NULL;
431}
432
1368ee70
ZW
433/* Token-buffer helper functions. */
434
435/* Expand a token list's string space. */
436static void
c5a04734 437expand_name_space (list, len)
1368ee70 438 cpp_toklist *list;
c5a04734
ZW
439 unsigned int len;
440{
441 list->name_cap += len;
442 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
1368ee70
ZW
443}
444
445/* Expand the number of tokens in a list. */
446static void
447expand_token_space (list)
448 cpp_toklist *list;
449{
450 list->tokens_cap *= 2;
451 list->tokens = (cpp_token *)
c5a04734
ZW
452 xrealloc (list->tokens - 1, (list->tokens_cap + 1) * sizeof (cpp_token));
453 list->tokens++; /* Skip the dummy. */
1368ee70
ZW
454}
455
c5a04734
ZW
456/* Initialize a token list. We allocate an extra token in front of
457 the token list, as this allows us to always peek at the previous
458 token without worrying about underflowing the list. */
1368ee70
ZW
459static void
460init_token_list (pfile, list, recycle)
461 cpp_reader *pfile;
462 cpp_toklist *list;
463 int recycle;
464{
c5a04734
ZW
465 /* Recycling a used list saves 3 free-malloc pairs. */
466 if (!recycle)
1368ee70 467 {
c5a04734
ZW
468 /* Initialize token space. Put a dummy token before the start
469 that will fail matches. */
470 list->tokens_cap = 256; /* 4K's worth. */
1368ee70 471 list->tokens = (cpp_token *)
c5a04734
ZW
472 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
473 list->tokens[0].type = CPP_EOF;
474 list->tokens++;
1368ee70 475
c5a04734 476 /* Initialize name space. */
1368ee70 477 list->name_cap = 1024;
1368ee70 478 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
c5a04734
ZW
479
480 /* Only create a comment space on demand. */
481 list->comments_cap = 0;
482 list->comments = 0;
1368ee70
ZW
483 }
484
c5a04734
ZW
485 list->tokens_used = 0;
486 list->name_used = 0;
487 list->comments_used = 0;
9e62c811
ZW
488 if (pfile->buffer)
489 list->line = pfile->buffer->lineno;
1368ee70
ZW
490 list->dir_handler = 0;
491 list->dir_flags = 0;
492}
493
494/* Scan an entire line and create a token list for it. Does not
495 macro-expand or execute directives. */
496
497void
498_cpp_scan_line (pfile, list)
499 cpp_reader *pfile;
500 cpp_toklist *list;
501{
502 int i, col;
503 long written, len;
504 enum cpp_ttype type;
9e62c811 505 int space_before;
1368ee70
ZW
506
507 init_token_list (pfile, list, 1);
508
509 written = CPP_WRITTEN (pfile);
510 i = 0;
9e62c811 511 space_before = 0;
1368ee70
ZW
512 for (;;)
513 {
514 col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
515 type = _cpp_lex_token (pfile);
516 len = CPP_WRITTEN (pfile) - written;
517 CPP_SET_WRITTEN (pfile, written);
518 if (type == CPP_HSPACE)
9e62c811
ZW
519 {
520 if (CPP_PEDANTIC (pfile))
521 pedantic_whitespace (pfile, pfile->token_buffer + written, len);
522 space_before = 1;
523 continue;
524 }
0f89df67
ZW
525 else if (type == CPP_COMMENT)
526 /* Only happens when processing -traditional macro definitions.
527 Do not give this a token entry, but do not change space_before
528 either. */
529 continue;
1368ee70
ZW
530
531 if (list->tokens_used >= list->tokens_cap)
532 expand_token_space (list);
533 if (list->name_used + len >= list->name_cap)
bb1ec1d7 534 expand_name_space (list, list->name_used + len + 1 - list->name_cap);
1368ee70 535
9e62c811
ZW
536 if (type == CPP_MACRO)
537 type = CPP_NAME;
538
1368ee70
ZW
539 list->tokens_used++;
540 list->tokens[i].type = type;
541 list->tokens[i].col = col;
c5a04734 542 list->tokens[i].flags = space_before ? PREV_WHITESPACE : 0;
9e62c811 543
1368ee70
ZW
544 if (type == CPP_VSPACE)
545 break;
546
547 list->tokens[i].val.name.len = len;
548 list->tokens[i].val.name.offset = list->name_used;
549 memcpy (list->namebuf + list->name_used, CPP_PWRITTEN (pfile), len);
550 list->name_used += len;
551 i++;
9e62c811 552 space_before = 0;
1368ee70
ZW
553 }
554 list->tokens[i].aux = CPP_BUFFER (pfile)->lineno + 1;
9e62c811
ZW
555
556 /* XXX Temporary kluge: put back the newline. */
557 FORWARD(-1);
1368ee70
ZW
558}
559
560
45b966db
ZW
561/* Skip a C-style block comment. We know it's a comment, and point is
562 at the second character of the starter. */
563static void
564skip_block_comment (pfile)
565 cpp_reader *pfile;
566{
3a2b2c7a 567 unsigned int line, col;
61474454 568 const U_CHAR *limit, *cur;
45b966db
ZW
569
570 FORWARD(1);
3a2b2c7a
ZW
571 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
572 col = CPP_BUF_COL (CPP_BUFFER (pfile));
61474454
NB
573 limit = CPP_BUFFER (pfile)->rlimit;
574 cur = CPP_BUFFER (pfile)->cur;
575
576 while (cur < limit)
45b966db 577 {
61474454
NB
578 char c = *cur++;
579 if (c == '\n' || c == '\r')
45b966db
ZW
580 {
581 /* \r cannot be a macro escape marker here. */
582 if (!ACTIVE_MARK_P (pfile))
61474454
NB
583 CPP_BUMP_LINE_CUR (pfile, cur);
584 }
585 else if (c == '*')
586 {
587 /* Check for teminator. */
588 if (cur < limit && *cur == '/')
589 goto out;
590
591 /* Warn about comment starter embedded in comment. */
592 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
593 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
594 cur - CPP_BUFFER (pfile)->line_base,
595 "'/*' within comment");
45b966db 596 }
45b966db 597 }
61474454
NB
598
599 cpp_error_with_line (pfile, line, col, "unterminated comment");
600 cur--;
601 out:
602 CPP_BUFFER (pfile)->cur = cur + 1;
45b966db
ZW
603}
604
605/* Skip a C++/Chill line comment. We know it's a comment, and point
606 is at the second character of the initiator. */
607static void
608skip_line_comment (pfile)
609 cpp_reader *pfile;
610{
611 FORWARD(1);
612 for (;;)
613 {
614 int c = GETC ();
615
616 /* We don't have to worry about EOF in here. */
617 if (c == '\n')
618 {
619 /* Don't consider final '\n' to be part of comment. */
620 FORWARD(-1);
621 return;
622 }
623 else if (c == '\r')
624 {
625 /* \r cannot be a macro escape marker here. */
626 if (!ACTIVE_MARK_P (pfile))
627 CPP_BUMP_LINE (pfile);
ae79697b 628 if (CPP_OPTION (pfile, warn_comments))
45b966db
ZW
629 cpp_warning (pfile, "backslash-newline within line comment");
630 }
631 }
632}
633
634/* Skip a comment - C, C++, or Chill style. M is the first character
635 of the comment marker. If this really is a comment, skip to its
636 end and return ' '. If this is not a comment, return M (which will
637 be '/' or '-'). */
638
639static int
640skip_comment (pfile, m)
641 cpp_reader *pfile;
642 int m;
643{
644 if (m == '/' && PEEKC() == '*')
645 {
646 skip_block_comment (pfile);
647 return ' ';
648 }
649 else if (m == '/' && PEEKC() == '/')
650 {
651 if (CPP_BUFFER (pfile)->system_header_p)
652 {
653 /* We silently allow C++ comments in system headers, irrespective
654 of conformance mode, because lots of busted systems do that
655 and trying to clean it up in fixincludes is a nightmare. */
656 skip_line_comment (pfile);
657 return ' ';
658 }
ae79697b 659 else if (CPP_OPTION (pfile, cplusplus_comments))
45b966db 660 {
0f89df67 661 if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
45b966db 662 {
0f89df67
ZW
663 if (CPP_WTRADITIONAL (pfile))
664 cpp_pedwarn (pfile,
665 "C++ style comments are not allowed in traditional C");
666 else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
667 cpp_pedwarn (pfile,
668 "C++ style comments are not allowed in ISO C89");
669 if (CPP_WTRADITIONAL (pfile)
670 || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
671 cpp_pedwarn (pfile,
45b966db
ZW
672 "(this will be reported only once per input file)");
673 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
674 }
675 skip_line_comment (pfile);
676 return ' ';
677 }
678 else
679 return m;
680 }
681 else if (m == '-' && PEEKC() == '-'
ae79697b 682 && CPP_OPTION (pfile, chill))
45b966db
ZW
683 {
684 skip_line_comment (pfile);
685 return ' ';
686 }
687 else
688 return m;
689}
690
691/* Identical to skip_comment except that it copies the comment into the
692 token_buffer. This is used if !discard_comments. */
693static int
694copy_comment (pfile, m)
695 cpp_reader *pfile;
696 int m;
697{
698 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
699 const U_CHAR *limit;
700
701 if (skip_comment (pfile, m) == m)
702 return m;
703
704 limit = CPP_BUFFER (pfile)->cur;
705 CPP_RESERVE (pfile, limit - start + 2);
706 CPP_PUTC_Q (pfile, m);
707 for (; start <= limit; start++)
708 if (*start != '\r')
709 CPP_PUTC_Q (pfile, *start);
710
711 return ' ';
712}
713
64aaf407
NB
714static void
715null_warning (pfile, count)
716 cpp_reader *pfile;
717 unsigned int count;
718{
719 if (count == 1)
720 cpp_warning (pfile, "embedded null character ignored");
721 else
722 cpp_warning (pfile, "embedded null characters ignored");
723}
724
45b966db
ZW
725/* Skip whitespace \-newline and comments. Does not macro-expand. */
726
727void
728_cpp_skip_hspace (pfile)
729 cpp_reader *pfile;
730{
64aaf407 731 unsigned int null_count = 0;
45b966db 732 int c;
64aaf407 733
45b966db
ZW
734 while (1)
735 {
736 c = GETC();
737 if (c == EOF)
64aaf407 738 goto out;
45b966db
ZW
739 else if (is_hspace(c))
740 {
741 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
742 cpp_pedwarn (pfile, "%s in preprocessing directive",
743 c == '\f' ? "formfeed" : "vertical tab");
64aaf407
NB
744 else if (c == '\0')
745 null_count++;
45b966db
ZW
746 }
747 else if (c == '\r')
748 {
749 /* \r is a backslash-newline marker if !has_escapes, and
750 a deletable-whitespace or no-reexpansion marker otherwise. */
751 if (CPP_BUFFER (pfile)->has_escapes)
752 {
753 if (PEEKC() == ' ')
754 FORWARD(1);
755 else
756 break;
757 }
758 else
759 CPP_BUMP_LINE (pfile);
760 }
761 else if (c == '/' || c == '-')
762 {
763 c = skip_comment (pfile, c);
764 if (c != ' ')
765 break;
766 }
767 else
768 break;
769 }
770 FORWARD(-1);
64aaf407
NB
771 out:
772 if (null_count)
773 null_warning (pfile, null_count);
45b966db
ZW
774}
775
776/* Read and discard the rest of the current line. */
777
778void
779_cpp_skip_rest_of_line (pfile)
780 cpp_reader *pfile;
781{
782 for (;;)
783 {
784 int c = GETC();
785 switch (c)
786 {
787 case '\n':
788 FORWARD(-1);
789 case EOF:
790 return;
791
792 case '\r':
793 if (! CPP_BUFFER (pfile)->has_escapes)
794 CPP_BUMP_LINE (pfile);
795 break;
796
797 case '\'':
798 case '\"':
799 skip_string (pfile, c);
800 break;
801
802 case '/':
803 case '-':
804 skip_comment (pfile, c);
805 break;
806
807 case '\f':
808 case '\v':
809 if (CPP_PEDANTIC (pfile))
810 cpp_pedwarn (pfile, "%s in preprocessing directive",
811 c == '\f' ? "formfeed" : "vertical tab");
812 break;
813
814 }
815 }
816}
817
818/* Parse an identifier starting with C. */
819
820void
821_cpp_parse_name (pfile, c)
822 cpp_reader *pfile;
823 int c;
824{
825 for (;;)
826 {
827 if (! is_idchar(c))
828 {
829 FORWARD (-1);
830 break;
831 }
832
833 if (c == '$' && CPP_PEDANTIC (pfile))
834 cpp_pedwarn (pfile, "`$' in identifier");
835
836 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
837 CPP_PUTC_Q (pfile, c);
838 c = GETC();
839 if (c == EOF)
840 break;
841 }
45b966db
ZW
842 return;
843}
844
845/* Parse and skip over a string starting with C. A single quoted
846 string is treated like a double -- some programs (e.g., troff) are
847 perverse this way. (However, a single quoted string is not allowed
848 to extend over multiple lines.) */
849static void
850skip_string (pfile, c)
851 cpp_reader *pfile;
852 int c;
853{
3a2b2c7a 854 unsigned int start_line, start_column;
64aaf407 855 unsigned int null_count = 0;
45b966db 856
3a2b2c7a
ZW
857 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
858 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
45b966db
ZW
859 while (1)
860 {
861 int cc = GETC();
862 switch (cc)
863 {
864 case EOF:
865 cpp_error_with_line (pfile, start_line, start_column,
866 "unterminated string or character constant");
867 if (pfile->multiline_string_line != start_line
868 && pfile->multiline_string_line != 0)
869 cpp_error_with_line (pfile,
870 pfile->multiline_string_line, -1,
871 "possible real start of unterminated constant");
872 pfile->multiline_string_line = 0;
64aaf407 873 goto out;
45b966db 874
64aaf407
NB
875 case '\0':
876 null_count++;
877 break;
878
45b966db
ZW
879 case '\n':
880 CPP_BUMP_LINE (pfile);
881 /* In Fortran and assembly language, silently terminate
882 strings of either variety at end of line. This is a
883 kludge around not knowing where comments are in these
884 languages. */
ae79697b
ZW
885 if (CPP_OPTION (pfile, lang_fortran)
886 || CPP_OPTION (pfile, lang_asm))
45b966db
ZW
887 {
888 FORWARD(-1);
64aaf407 889 goto out;
45b966db
ZW
890 }
891 /* Character constants may not extend over multiple lines.
892 In Standard C, neither may strings. We accept multiline
893 strings as an extension. */
894 if (c == '\'')
895 {
896 cpp_error_with_line (pfile, start_line, start_column,
897 "unterminated character constant");
898 FORWARD(-1);
64aaf407 899 goto out;
45b966db
ZW
900 }
901 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
902 cpp_pedwarn_with_line (pfile, start_line, start_column,
903 "string constant runs past end of line");
904 if (pfile->multiline_string_line == 0)
905 pfile->multiline_string_line = start_line;
906 break;
907
908 case '\r':
909 if (CPP_BUFFER (pfile)->has_escapes)
910 {
911 cpp_ice (pfile, "\\r escape inside string constant");
912 FORWARD(1);
913 }
914 else
915 /* Backslash newline is replaced by nothing at all. */
916 CPP_BUMP_LINE (pfile);
917 break;
918
919 case '\\':
920 FORWARD(1);
921 break;
922
923 case '\"':
924 case '\'':
925 if (cc == c)
64aaf407 926 goto out;
45b966db
ZW
927 break;
928 }
929 }
64aaf407
NB
930
931 out:
932 if (null_count == 1)
933 cpp_warning (pfile, "null character in string or character constant");
934 else if (null_count > 1)
935 cpp_warning (pfile, "null characters in string or character constant");
45b966db
ZW
936}
937
938/* Parse a string and copy it to the output. */
939
940static void
941parse_string (pfile, c)
942 cpp_reader *pfile;
943 int c;
944{
945 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
946 const U_CHAR *limit;
947
948 skip_string (pfile, c);
949
950 limit = CPP_BUFFER (pfile)->cur;
951 CPP_RESERVE (pfile, limit - start + 2);
952 CPP_PUTC_Q (pfile, c);
953 for (; start < limit; start++)
954 if (*start != '\r')
955 CPP_PUTC_Q (pfile, *start);
956}
957
958/* Read an assertion into the token buffer, converting to
959 canonical form: `#predicate(a n swe r)' The next non-whitespace
960 character to read should be the first letter of the predicate.
961 Returns 0 for syntax error, 1 for bare predicate, 2 for predicate
962 with answer (see callers for why). In case of 0, an error has been
963 printed. */
964int
965_cpp_parse_assertion (pfile)
966 cpp_reader *pfile;
967{
968 int c, dropwhite;
969 _cpp_skip_hspace (pfile);
970 c = PEEKC();
bfab56e7
ZW
971 if (c == '\n')
972 {
973 cpp_error (pfile, "assertion without predicate");
974 return 0;
975 }
976 else if (! is_idstart(c))
45b966db
ZW
977 {
978 cpp_error (pfile, "assertion predicate is not an identifier");
979 return 0;
980 }
981 CPP_PUTC(pfile, '#');
982 FORWARD(1);
983 _cpp_parse_name (pfile, c);
984
985 c = PEEKC();
986 if (c != '(')
987 {
988 if (is_hspace(c) || c == '\r')
989 _cpp_skip_hspace (pfile);
990 c = PEEKC();
991 }
992 if (c != '(')
993 return 1;
994
995 CPP_PUTC(pfile, '(');
996 FORWARD(1);
997 dropwhite = 1;
998 while ((c = GETC()) != ')')
999 {
1000 if (is_space(c))
1001 {
1002 if (! dropwhite)
1003 {
1004 CPP_PUTC(pfile, ' ');
1005 dropwhite = 1;
1006 }
1007 }
1008 else if (c == '\n' || c == EOF)
1009 {
1010 if (c == '\n') FORWARD(-1);
1011 cpp_error (pfile, "un-terminated assertion answer");
1012 return 0;
1013 }
1014 else if (c == '\r')
1015 /* \r cannot be a macro escape here. */
1016 CPP_BUMP_LINE (pfile);
1017 else
1018 {
1019 CPP_PUTC (pfile, c);
1020 dropwhite = 0;
1021 }
1022 }
1023
1024 if (pfile->limit[-1] == ' ')
1025 pfile->limit[-1] = ')';
1026 else if (pfile->limit[-1] == '(')
1027 {
1028 cpp_error (pfile, "empty token sequence in assertion");
1029 return 0;
1030 }
1031 else
1032 CPP_PUTC (pfile, ')');
1033
45b966db
ZW
1034 return 2;
1035}
1036
1037/* Get the next token, and add it to the text in pfile->token_buffer.
1038 Return the kind of token we got. */
1039
3a2b2c7a 1040enum cpp_ttype
45b966db
ZW
1041_cpp_lex_token (pfile)
1042 cpp_reader *pfile;
1043{
5eec0563 1044 register int c, c2;
3a2b2c7a 1045 enum cpp_ttype token;
45b966db 1046
f2d5f0cc
ZW
1047 if (CPP_BUFFER (pfile) == NULL)
1048 return CPP_EOF;
1049
45b966db
ZW
1050 get_next:
1051 c = GETC();
1052 switch (c)
1053 {
1054 case EOF:
1055 return CPP_EOF;
1056
1057 case '/':
1058 if (PEEKC () == '=')
1059 goto op2;
1060
1061 comment:
ae79697b 1062 if (CPP_OPTION (pfile, discard_comments))
45b966db
ZW
1063 c = skip_comment (pfile, c);
1064 else
1065 c = copy_comment (pfile, c);
1066 if (c != ' ')
1067 goto randomchar;
1068
1069 /* Comments are equivalent to spaces.
1070 For -traditional, a comment is equivalent to nothing. */
ff2b53ef 1071 if (!CPP_OPTION (pfile, discard_comments))
45b966db 1072 return CPP_COMMENT;
9e62c811 1073 else if (CPP_TRADITIONAL (pfile))
0f89df67
ZW
1074 {
1075 if (pfile->parsing_define_directive)
1076 return CPP_COMMENT;
1077 goto get_next;
1078 }
45b966db
ZW
1079 else
1080 {
1081 CPP_PUTC (pfile, c);
1082 return CPP_HSPACE;
1083 }
1084
1085 case '#':
5eec0563
JM
1086 CPP_PUTC (pfile, c);
1087
1088 hash:
45b966db
ZW
1089 if (pfile->parsing_if_directive)
1090 {
f2d5f0cc 1091 CPP_ADJUST_WRITTEN (pfile, -1);
bfab56e7
ZW
1092 if (_cpp_parse_assertion (pfile))
1093 return CPP_ASSERTION;
5eec0563 1094 return CPP_OTHER;
45b966db
ZW
1095 }
1096
9e62c811 1097 if (pfile->parsing_define_directive)
45b966db 1098 {
5eec0563
JM
1099 c2 = PEEKC ();
1100 if (c2 == '#')
1101 {
1102 FORWARD (1);
1103 CPP_PUTC (pfile, c2);
1104 }
1105 else if (c2 == '%' && PEEKN (1) == ':')
1106 {
1107 /* Digraph: "%:" == "#". */
1108 FORWARD (1);
1109 CPP_RESERVE (pfile, 2);
1110 CPP_PUTC_Q (pfile, c2);
1111 CPP_PUTC_Q (pfile, GETC ());
1112 }
1113 else
1368ee70 1114 return CPP_HASH;
5eec0563 1115
1368ee70 1116 return CPP_PASTE;
45b966db
ZW
1117 }
1118
1119 if (!pfile->only_seen_white)
5eec0563
JM
1120 return CPP_OTHER;
1121
1122 /* Remove the "#" or "%:" from the token buffer. */
1123 CPP_ADJUST_WRITTEN (pfile, (c == '#' ? -1 : -2));
45b966db
ZW
1124 return CPP_DIRECTIVE;
1125
1126 case '\"':
1127 case '\'':
1128 parse_string (pfile, c);
45b966db
ZW
1129 return c == '\'' ? CPP_CHAR : CPP_STRING;
1130
1131 case '$':
ae79697b 1132 if (!CPP_OPTION (pfile, dollars_in_ident))
45b966db
ZW
1133 goto randomchar;
1134 goto letter;
1135
1136 case ':':
5eec0563
JM
1137 c2 = PEEKC ();
1138 /* Digraph: ":>" == "]". */
1139 if (c2 == '>'
1140 || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
45b966db
ZW
1141 goto op2;
1142 goto randomchar;
1143
1144 case '&':
1145 case '+':
1146 case '|':
1147 c2 = PEEKC ();
1148 if (c2 == c || c2 == '=')
1149 goto op2;
1150 goto randomchar;
1151
5eec0563
JM
1152 case '%':
1153 /* Digraphs: "%:" == "#", "%>" == "}". */
1154 c2 = PEEKC ();
1155 if (c2 == ':')
1156 {
1157 FORWARD (1);
1158 CPP_RESERVE (pfile, 2);
1159 CPP_PUTC_Q (pfile, c);
1160 CPP_PUTC_Q (pfile, c2);
1161 goto hash;
1162 }
1163 else if (c2 == '>')
1164 {
1165 FORWARD (1);
1166 CPP_RESERVE (pfile, 2);
1167 CPP_PUTC_Q (pfile, c);
1168 CPP_PUTC_Q (pfile, c2);
1368ee70 1169 return CPP_OPEN_BRACE;
5eec0563
JM
1170 }
1171 /* else fall through */
1172
45b966db
ZW
1173 case '*':
1174 case '!':
45b966db
ZW
1175 case '=':
1176 case '^':
1177 if (PEEKC () == '=')
1178 goto op2;
1179 goto randomchar;
1180
1181 case '-':
1182 c2 = PEEKC ();
1183 if (c2 == '-')
1184 {
ae79697b 1185 if (CPP_OPTION (pfile, chill))
45b966db
ZW
1186 goto comment; /* Chill style comment */
1187 else
1188 goto op2;
1189 }
1190 else if (c2 == '=')
1191 goto op2;
1192 else if (c2 == '>')
1193 {
ae79697b 1194 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
45b966db
ZW
1195 {
1196 /* In C++, there's a ->* operator. */
1197 token = CPP_OTHER;
45b966db
ZW
1198 CPP_RESERVE (pfile, 4);
1199 CPP_PUTC_Q (pfile, c);
1200 CPP_PUTC_Q (pfile, GETC ());
1201 CPP_PUTC_Q (pfile, GETC ());
45b966db
ZW
1202 return token;
1203 }
1204 goto op2;
1205 }
1206 goto randomchar;
1207
1208 case '<':
1209 if (pfile->parsing_include_directive)
1210 {
1211 for (;;)
1212 {
1213 CPP_PUTC (pfile, c);
1214 if (c == '>')
1215 break;
1216 c = GETC ();
1217 if (c == '\n' || c == EOF)
1218 {
1219 cpp_error (pfile,
1220 "missing '>' in `#include <FILENAME>'");
1221 break;
1222 }
1223 else if (c == '\r')
1224 {
1225 if (!CPP_BUFFER (pfile)->has_escapes)
1226 {
1227 /* Backslash newline is replaced by nothing. */
1228 CPP_ADJUST_WRITTEN (pfile, -1);
1229 CPP_BUMP_LINE (pfile);
1230 }
1231 else
1232 {
1233 /* We might conceivably get \r- or \r<space> in
1234 here. Just delete 'em. */
1235 int d = GETC();
1236 if (d != '-' && d != ' ')
1237 cpp_ice (pfile, "unrecognized escape \\r%c", d);
1238 CPP_ADJUST_WRITTEN (pfile, -1);
1239 }
1240 }
1241 }
1242 return CPP_STRING;
1243 }
5eec0563
JM
1244 /* Digraphs: "<%" == "{", "<:" == "[". */
1245 c2 = PEEKC ();
1246 if (c2 == '%')
1247 {
1248 FORWARD (1);
1249 CPP_RESERVE (pfile, 2);
1250 CPP_PUTC_Q (pfile, c);
1251 CPP_PUTC_Q (pfile, c2);
1368ee70 1252 return CPP_CLOSE_BRACE;
5eec0563
JM
1253 }
1254 else if (c2 == ':')
1255 goto op2;
45b966db
ZW
1256 /* else fall through */
1257 case '>':
1258 c2 = PEEKC ();
1259 if (c2 == '=')
1260 goto op2;
1261 /* GNU C++ supports MIN and MAX operators <? and >?. */
ae79697b 1262 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
45b966db
ZW
1263 goto randomchar;
1264 FORWARD(1);
5eec0563
JM
1265 CPP_RESERVE (pfile, 3);
1266 CPP_PUTC_Q (pfile, c);
1267 CPP_PUTC_Q (pfile, c2);
1268 if (PEEKC () == '=')
45b966db 1269 CPP_PUTC_Q (pfile, GETC ());
45b966db
ZW
1270 return CPP_OTHER;
1271
1272 case '.':
1273 c2 = PEEKC ();
5eec0563 1274 if (ISDIGIT (c2))
45b966db 1275 {
5eec0563 1276 CPP_PUTC (pfile, c);
45b966db
ZW
1277 c = GETC ();
1278 goto number;
1279 }
1280
1281 /* In C++ there's a .* operator. */
ae79697b 1282 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
45b966db
ZW
1283 goto op2;
1284
1285 if (c2 == '.' && PEEKN(1) == '.')
1286 {
5eec0563 1287 CPP_RESERVE (pfile, 3);
45b966db
ZW
1288 CPP_PUTC_Q (pfile, '.');
1289 CPP_PUTC_Q (pfile, '.');
1290 CPP_PUTC_Q (pfile, '.');
1291 FORWARD (2);
1368ee70 1292 return CPP_ELLIPSIS;
45b966db
ZW
1293 }
1294 goto randomchar;
1295
1296 op2:
5eec0563 1297 CPP_RESERVE (pfile, 2);
45b966db
ZW
1298 CPP_PUTC_Q (pfile, c);
1299 CPP_PUTC_Q (pfile, GETC ());
5eec0563 1300 return CPP_OTHER;
45b966db
ZW
1301
1302 case 'L':
1303 c2 = PEEKC ();
1304 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1305 {
1306 CPP_PUTC (pfile, c);
1307 c = GETC ();
1308 parse_string (pfile, c);
45b966db
ZW
1309 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1310 }
1311 goto letter;
1312
1313 case '0': case '1': case '2': case '3': case '4':
1314 case '5': case '6': case '7': case '8': case '9':
1315 number:
1316 c2 = '.';
1317 for (;;)
1318 {
1319 CPP_RESERVE (pfile, 2);
1320 CPP_PUTC_Q (pfile, c);
1321 c = PEEKC ();
1322 if (c == EOF)
1323 break;
1324 if (!is_numchar(c) && c != '.'
1325 && ((c2 != 'e' && c2 != 'E'
1326 && ((c2 != 'p' && c2 != 'P')
ae79697b 1327 || CPP_OPTION (pfile, c89)))
45b966db
ZW
1328 || (c != '+' && c != '-')))
1329 break;
1330 FORWARD(1);
1331 c2= c;
1332 }
45b966db
ZW
1333 return CPP_NUMBER;
1334 case 'b': case 'c': case 'd': case 'h': case 'o':
1335 case 'B': case 'C': case 'D': case 'H': case 'O':
ae79697b 1336 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
45b966db 1337 {
45b966db
ZW
1338 CPP_RESERVE (pfile, 2);
1339 CPP_PUTC_Q (pfile, c);
1340 CPP_PUTC_Q (pfile, '\'');
1341 FORWARD(1);
1342 for (;;)
1343 {
1344 c = GETC();
1345 if (c == EOF)
1346 goto chill_number_eof;
1347 if (!is_numchar(c))
1348 break;
1349 CPP_PUTC (pfile, c);
1350 }
1351 if (c == '\'')
1352 {
1353 CPP_RESERVE (pfile, 2);
1354 CPP_PUTC_Q (pfile, c);
45b966db
ZW
1355 return CPP_STRING;
1356 }
1357 else
1358 {
1359 FORWARD(-1);
1360 chill_number_eof:
45b966db
ZW
1361 return CPP_NUMBER;
1362 }
1363 }
1364 else
1365 goto letter;
1366 case '_':
1367 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1368 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1369 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1370 case 'x': case 'y': case 'z':
1371 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1372 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1373 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1374 case 'Y': case 'Z':
1375 letter:
45b966db
ZW
1376 _cpp_parse_name (pfile, c);
1377 return CPP_MACRO;
1378
64aaf407
NB
1379 case ' ': case '\t': case '\v': case '\f': case '\0':
1380 {
1381 int null_count = 0;
1382
1383 for (;;)
1384 {
1385 if (c == '\0')
1386 null_count++;
1387 else
1388 CPP_PUTC (pfile, c);
1389 c = PEEKC ();
1390 if (c == EOF || !is_hspace(c))
1391 break;
1392 FORWARD(1);
1393 }
1394 if (null_count)
1395 null_warning (pfile, null_count);
1396 return CPP_HSPACE;
1397 }
45b966db
ZW
1398
1399 case '\r':
1400 if (CPP_BUFFER (pfile)->has_escapes)
1401 {
1402 c = GETC ();
1403 if (c == '-')
1404 {
1405 if (pfile->output_escapes)
1406 CPP_PUTS (pfile, "\r-", 2);
1407 _cpp_parse_name (pfile, GETC ());
1408 return CPP_NAME;
1409 }
1410 else if (c == ' ')
1411 {
ff2b53ef
ZW
1412 /* "\r " means a space, but only if necessary to prevent
1413 accidental token concatenation. */
45b966db
ZW
1414 CPP_RESERVE (pfile, 2);
1415 if (pfile->output_escapes)
1416 CPP_PUTC_Q (pfile, '\r');
1417 CPP_PUTC_Q (pfile, c);
1418 return CPP_HSPACE;
1419 }
1420 else
1421 {
1422 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1423 goto get_next;
1424 }
1425 }
1426 else
1427 {
1428 /* Backslash newline is ignored. */
cbccf5e8
MM
1429 if (!ACTIVE_MARK_P (pfile))
1430 CPP_BUMP_LINE (pfile);
45b966db
ZW
1431 goto get_next;
1432 }
1433
1434 case '\n':
1435 CPP_PUTC (pfile, c);
45b966db
ZW
1436 return CPP_VSPACE;
1437
1368ee70
ZW
1438 case '(': token = CPP_OPEN_PAREN; goto char1;
1439 case ')': token = CPP_CLOSE_PAREN; goto char1;
1440 case '{': token = CPP_OPEN_BRACE; goto char1;
1441 case '}': token = CPP_CLOSE_BRACE; goto char1;
1442 case ',': token = CPP_COMMA; goto char1;
1443 case ';': token = CPP_SEMICOLON; goto char1;
45b966db
ZW
1444
1445 randomchar:
1446 default:
1447 token = CPP_OTHER;
1448 char1:
45b966db
ZW
1449 CPP_PUTC (pfile, c);
1450 return token;
1451 }
1452}
1453
1454/* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1455 Caller is expected to have checked no_macro_expand. */
1456static int
1457maybe_macroexpand (pfile, written)
1458 cpp_reader *pfile;
1459 long written;
1460{
1461 U_CHAR *macro = pfile->token_buffer + written;
1462 size_t len = CPP_WRITTEN (pfile) - written;
1463 HASHNODE *hp = _cpp_lookup (pfile, macro, len);
1464
a7abcbbf
ZW
1465 /* _cpp_lookup never returns null. */
1466 if (hp->type == T_VOID)
45b966db 1467 return 0;
d9e0bd53 1468 if (hp->disabled || hp->type == T_IDENTITY)
45b966db
ZW
1469 {
1470 if (pfile->output_escapes)
1471 {
1472 /* Insert a no-reexpand marker before IDENT. */
1473 CPP_RESERVE (pfile, 2);
1474 CPP_ADJUST_WRITTEN (pfile, 2);
1475 macro = pfile->token_buffer + written;
1476
1477 memmove (macro + 2, macro, len);
1478 macro[0] = '\r';
1479 macro[1] = '-';
1480 }
1481 return 0;
1482 }
ff2b53ef
ZW
1483 if (hp->type == T_EMPTY)
1484 {
1485 /* Special case optimization: macro expands to nothing. */
1486 CPP_SET_WRITTEN (pfile, written);
1487 CPP_PUTC_Q (pfile, ' ');
1488 return 1;
1489 }
45b966db
ZW
1490
1491 /* If macro wants an arglist, verify that a '(' follows. */
d9e0bd53 1492 if (hp->type == T_FMACRO)
45b966db
ZW
1493 {
1494 int macbuf_whitespace = 0;
1495 int c;
1496
1497 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1498 {
1499 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1500 for (;;)
1501 {
1502 _cpp_skip_hspace (pfile);
1503 c = PEEKC ();
1504 if (c == '\n')
1505 FORWARD(1);
1506 else
1507 break;
1508 }
1509 if (point != CPP_BUFFER (pfile)->cur)
1510 macbuf_whitespace = 1;
1511 if (c == '(')
1512 goto is_macro_call;
1513 else if (c != EOF)
1514 goto not_macro_call;
1515 cpp_pop_buffer (pfile);
1516 }
1517
1518 CPP_SET_MARK (pfile);
1519 for (;;)
1520 {
1521 _cpp_skip_hspace (pfile);
1522 c = PEEKC ();
1523 if (c == '\n')
1524 FORWARD(1);
1525 else
1526 break;
1527 }
1528 CPP_GOTO_MARK (pfile);
1529
1530 if (c != '(')
1531 {
1532 not_macro_call:
1533 if (macbuf_whitespace)
1534 CPP_PUTC (pfile, ' ');
1535 return 0;
1536 }
1537 }
1538
1539 is_macro_call:
1540 /* This is now known to be a macro call.
1541 Expand the macro, reading arguments as needed,
1542 and push the expansion on the input stack. */
1543 _cpp_macroexpand (pfile, hp);
1544 CPP_SET_WRITTEN (pfile, written);
1545 return 1;
1546}
1547
9e62c811
ZW
1548/* Complain about \v or \f in a preprocessing directive (constraint
1549 violation, C99 6.10 para 5). Caller has checked CPP_PEDANTIC. */
1550static void
1551pedantic_whitespace (pfile, p, len)
1552 cpp_reader *pfile;
1553 U_CHAR *p;
1554 unsigned int len;
1555{
1556 while (len)
1557 {
1558 if (*p == '\v')
1559 cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1560 else if (*p == '\f')
1561 cpp_pedwarn (pfile, "form feed in preprocessing directive");
1562 p++;
1563 len--;
1564 }
1565}
1566
1567
3a2b2c7a 1568enum cpp_ttype
45b966db
ZW
1569cpp_get_token (pfile)
1570 cpp_reader *pfile;
1571{
3a2b2c7a 1572 enum cpp_ttype token;
45b966db
ZW
1573 long written = CPP_WRITTEN (pfile);
1574
1575 get_next:
1576 token = _cpp_lex_token (pfile);
1577
1578 switch (token)
1579 {
1580 default:
ff2b53ef
ZW
1581 pfile->potential_control_macro = 0;
1582 pfile->only_seen_white = 0;
1583 return token;
1584
1585 case CPP_VSPACE:
1586 if (pfile->only_seen_white == 0)
1587 pfile->only_seen_white = 1;
1588 CPP_BUMP_LINE (pfile);
ff2b53ef
ZW
1589 return token;
1590
1591 case CPP_HSPACE:
1592 case CPP_COMMENT:
45b966db
ZW
1593 return token;
1594
1595 case CPP_DIRECTIVE:
ff2b53ef 1596 pfile->potential_control_macro = 0;
45b966db
ZW
1597 if (_cpp_handle_directive (pfile))
1598 return CPP_DIRECTIVE;
1599 pfile->only_seen_white = 0;
1600 CPP_PUTC (pfile, '#');
1601 return CPP_OTHER;
1602
1603 case CPP_MACRO:
ff2b53ef
ZW
1604 pfile->potential_control_macro = 0;
1605 pfile->only_seen_white = 0;
45b966db
ZW
1606 if (! pfile->no_macro_expand
1607 && maybe_macroexpand (pfile, written))
1608 goto get_next;
1609 return CPP_NAME;
1610
1611 case CPP_EOF:
f2d5f0cc
ZW
1612 if (CPP_BUFFER (pfile) == NULL)
1613 return CPP_EOF;
45b966db
ZW
1614 if (CPP_BUFFER (pfile)->manual_pop)
1615 /* If we've been reading from redirected input, the
1616 frontend will pop the buffer. */
1617 return CPP_EOF;
45b966db 1618
f2d5f0cc
ZW
1619 if (CPP_BUFFER (pfile)->seen_eof)
1620 {
45b966db
ZW
1621 cpp_pop_buffer (pfile);
1622 goto get_next;
1623 }
1624 else
1625 {
1626 _cpp_handle_eof (pfile);
1627 return CPP_POP;
1628 }
1629 }
1630}
1631
1632/* Like cpp_get_token, but skip spaces and comments. */
1633
3a2b2c7a 1634enum cpp_ttype
45b966db
ZW
1635cpp_get_non_space_token (pfile)
1636 cpp_reader *pfile;
1637{
1638 int old_written = CPP_WRITTEN (pfile);
1639 for (;;)
1640 {
3a2b2c7a 1641 enum cpp_ttype token = cpp_get_token (pfile);
ff2b53ef 1642 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
45b966db
ZW
1643 return token;
1644 CPP_SET_WRITTEN (pfile, old_written);
1645 }
1646}
1647
ff2b53ef 1648/* Like cpp_get_token, except that it does not execute directives,
9e62c811
ZW
1649 does not consume vertical space, discards horizontal space, and
1650 automatically pops off macro buffers. */
3a2b2c7a 1651enum cpp_ttype
9e62c811 1652_cpp_get_directive_token (pfile)
45b966db
ZW
1653 cpp_reader *pfile;
1654{
ff2b53ef 1655 long old_written;
3a2b2c7a 1656 enum cpp_ttype token;
45b966db 1657
ff2b53ef
ZW
1658 get_next:
1659 old_written = CPP_WRITTEN (pfile);
1660 token = _cpp_lex_token (pfile);
1661 switch (token)
45b966db 1662 {
ff2b53ef
ZW
1663 default:
1664 return token;
45b966db 1665
ff2b53ef
ZW
1666 case CPP_VSPACE:
1667 /* Put it back and return VSPACE. */
1668 FORWARD(-1);
1669 CPP_ADJUST_WRITTEN (pfile, -1);
1670 return CPP_VSPACE;
45b966db 1671
ff2b53ef
ZW
1672 case CPP_HSPACE:
1673 if (CPP_PEDANTIC (pfile))
9e62c811
ZW
1674 pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1675 CPP_WRITTEN (pfile) - old_written);
1676 CPP_SET_WRITTEN (pfile, old_written);
1677 goto get_next;
ff2b53ef 1678 return CPP_HSPACE;
45b966db 1679
ff2b53ef
ZW
1680 case CPP_DIRECTIVE:
1681 /* Don't execute the directive, but don't smash it to OTHER either. */
1682 CPP_PUTC (pfile, '#');
1683 return CPP_DIRECTIVE;
1684
1685 case CPP_MACRO:
1686 if (! pfile->no_macro_expand
1687 && maybe_macroexpand (pfile, old_written))
1688 goto get_next;
1689 return CPP_NAME;
45b966db 1690
ff2b53ef
ZW
1691 case CPP_EOF:
1692 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
45b966db 1693 {
ff2b53ef
ZW
1694 cpp_pop_buffer (pfile);
1695 goto get_next;
45b966db 1696 }
ff2b53ef
ZW
1697 else
1698 /* This can happen for files that don't end with a newline,
1699 and for cpp_define and friends. Pretend they do, so
1700 callers don't have to deal. A warning will be issued by
1701 someone else, if necessary. */
1702 return CPP_VSPACE;
1703 }
1704}
1705
45b966db
ZW
1706/* Determine the current line and column. Used only by read_and_prescan. */
1707static U_CHAR *
1708find_position (start, limit, linep)
1709 U_CHAR *start;
1710 U_CHAR *limit;
1711 unsigned long *linep;
1712{
1713 unsigned long line = *linep;
1714 U_CHAR *lbase = start;
1715 while (start < limit)
1716 {
1717 U_CHAR ch = *start++;
1718 if (ch == '\n' || ch == '\r')
1719 {
1720 line++;
1721 lbase = start;
1722 }
1723 }
1724 *linep = line;
1725 return lbase;
1726}
1727
2a87fbe8
ZW
1728/* The following table is used by _cpp_read_and_prescan. If we have
1729 designated initializers, it can be constant data; otherwise, it is
1730 set up at runtime by _cpp_init_input_buffer. */
46d07497
ZW
1731
1732#ifndef UCHAR_MAX
1733#define UCHAR_MAX 255 /* assume 8-bit bytes */
1734#endif
1735
1736#if (GCC_VERSION >= 2007) || (__STDC_VERSION__ >= 199901L)
2a87fbe8
ZW
1737#define init_chartab() /* nothing */
1738#define CHARTAB static const unsigned char chartab[UCHAR_MAX + 1] = {
46d07497
ZW
1739#define END };
1740#define s(p, v) [p] = v,
1741#else
2a87fbe8
ZW
1742#define CHARTAB static unsigned char chartab[UCHAR_MAX + 1] = { 0 }; \
1743 static void init_chartab PARAMS ((void)) { \
1744 unsigned char *x = chartab;
46d07497
ZW
1745#define END }
1746#define s(p, v) x[p] = v;
1747#endif
1748
1749/* Table of characters that can't be handled in the inner loop.
2a87fbe8
ZW
1750 Also contains the mapping between trigraph third characters and their
1751 replacements. */
46d07497
ZW
1752#define SPECCASE_CR 1
1753#define SPECCASE_BACKSLASH 2
1754#define SPECCASE_QUESTION 3
1755
2a87fbe8 1756CHARTAB
46d07497
ZW
1757 s('\r', SPECCASE_CR)
1758 s('\\', SPECCASE_BACKSLASH)
1759 s('?', SPECCASE_QUESTION)
46d07497 1760
46d07497
ZW
1761 s('=', '#') s(')', ']') s('!', '|')
1762 s('(', '[') s('\'', '^') s('>', '}')
1763 s('/', '\\') s('<', '{') s('-', '~')
1764END
1765
1766#undef CHARTAB
46d07497
ZW
1767#undef END
1768#undef s
1769
2a87fbe8
ZW
1770#define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1771#define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1772
45b966db
ZW
1773/* Read the entire contents of file DESC into buffer BUF. LEN is how
1774 much memory to allocate initially; more will be allocated if
1775 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1776 canonical form (\n). If enabled, convert and/or warn about
1777 trigraphs. Convert backslash-newline to a one-character escape
1778 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1779 token). If there is no newline at the end of the file, add one and
1780 warn. Returns -1 on failure, or the actual length of the data to
1781 be scanned.
1782
1783 This function does a lot of work, and can be a serious performance
1784 bottleneck. It has been tuned heavily; make sure you understand it
1785 before hacking. The common case - no trigraphs, Unix style line
1786 breaks, backslash-newline set off by whitespace, newline at EOF -
1787 has been optimized at the expense of the others. The performance
1788 penalty for DOS style line breaks (\r\n) is about 15%.
1789
1790 Warnings lose particularly heavily since we have to determine the
1791 line number, which involves scanning from the beginning of the file
1792 or from the last warning. The penalty for the absence of a newline
1793 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1794
1795 If your file has more than one kind of end-of-line marker, you
04e3ec78
NB
1796 will get messed-up line numbering.
1797
1798 So that the cases of the switch statement do not have to concern
1799 themselves with the complications of reading beyond the end of the
1800 buffer, the buffer is guaranteed to have at least 3 characters in
1801 it (or however many are left in the file, if less) on entry to the
1802 switch. This is enough to handle trigraphs and the "\\\n\r" and
1803 "\\\r\n" cases.
1804
1805 The end of the buffer is marked by a '\\', which, being a special
1806 character, guarantees we will exit the fast-scan loops and perform
1807 a refill. */
46d07497 1808
45b966db
ZW
1809long
1810_cpp_read_and_prescan (pfile, fp, desc, len)
1811 cpp_reader *pfile;
1812 cpp_buffer *fp;
1813 int desc;
1814 size_t len;
1815{
1816 U_CHAR *buf = (U_CHAR *) xmalloc (len);
1817 U_CHAR *ip, *op, *line_base;
1818 U_CHAR *ibase;
45b966db
ZW
1819 unsigned long line;
1820 unsigned int deferred_newlines;
45b966db 1821 size_t offset;
04e3ec78 1822 int count = 0;
45b966db
ZW
1823
1824 offset = 0;
04e3ec78 1825 deferred_newlines = 0;
45b966db
ZW
1826 op = buf;
1827 line_base = buf;
1828 line = 1;
04e3ec78
NB
1829 ibase = pfile->input_buffer + 3;
1830 ip = ibase;
1831 ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
45b966db
ZW
1832
1833 for (;;)
1834 {
04e3ec78
NB
1835 U_CHAR *near_buff_end;
1836
04e3ec78 1837 count = read (desc, ibase, pfile->input_buffer_len);
45b966db
ZW
1838 if (count < 0)
1839 goto error;
04e3ec78
NB
1840
1841 ibase[count] = '\\'; /* Marks end of buffer */
1842 if (count)
45b966db 1843 {
04e3ec78
NB
1844 near_buff_end = pfile->input_buffer + count;
1845 offset += count;
45b966db 1846 if (offset > len)
04e3ec78
NB
1847 {
1848 size_t delta_op;
1849 size_t delta_line_base;
1b955cba 1850 len = offset * 2;
04e3ec78
NB
1851 if (offset > len)
1852 /* len overflowed.
1853 This could happen if the file is larger than half the
1854 maximum address space of the machine. */
1855 goto too_big;
1856
1857 delta_op = op - buf;
1858 delta_line_base = line_base - buf;
1859 buf = (U_CHAR *) xrealloc (buf, len);
1860 op = buf + delta_op;
1861 line_base = buf + delta_line_base;
1862 }
1863 }
1864 else
1865 {
1866 if (ip == ibase)
1867 break;
1868 /* Allow normal processing of the (at most 2) remaining
1869 characters. The end-of-buffer marker is still present
1870 and prevents false matches within the switch. */
1871 near_buff_end = ibase - 1;
45b966db
ZW
1872 }
1873
1874 for (;;)
1875 {
04e3ec78 1876 unsigned int span;
45b966db 1877
04e3ec78 1878 /* Deal with \-newline, potentially in the middle of a token. */
45b966db
ZW
1879 if (deferred_newlines)
1880 {
2a87fbe8 1881 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
04e3ec78
NB
1882 {
1883 /* Previous was not white space. Skip to white
1884 space, if we can, before outputting the \r's */
1885 span = 0;
1886 while (ip[span] != ' '
1887 && ip[span] != '\t'
1888 && ip[span] != '\n'
2a87fbe8 1889 && NORMAL(ip[span]))
04e3ec78
NB
1890 span++;
1891 memcpy (op, ip, span);
1892 op += span;
1893 ip += span;
2a87fbe8 1894 if (! NORMAL(ip[0]))
04e3ec78
NB
1895 goto do_speccase;
1896 }
1897 while (deferred_newlines)
1898 deferred_newlines--, *op++ = '\r';
45b966db
ZW
1899 }
1900
1901 /* Copy as much as we can without special treatment. */
04e3ec78 1902 span = 0;
2a87fbe8 1903 while (NORMAL (ip[span])) span++;
45b966db
ZW
1904 memcpy (op, ip, span);
1905 op += span;
1906 ip += span;
1907
04e3ec78
NB
1908 do_speccase:
1909 if (ip > near_buff_end) /* Do we have enough chars? */
1910 break;
2a87fbe8 1911 switch (chartab[*ip++])
45b966db 1912 {
45b966db 1913 case SPECCASE_CR: /* \r */
04e3ec78 1914 if (ip[-2] != '\n')
45b966db 1915 {
04e3ec78
NB
1916 if (*ip == '\n')
1917 ip++;
1918 *op++ = '\n';
45b966db 1919 }
45b966db
ZW
1920 break;
1921
1922 case SPECCASE_BACKSLASH: /* \ */
04e3ec78 1923 if (*ip == '\n')
45b966db 1924 {
04e3ec78 1925 deferred_newlines++;
45b966db
ZW
1926 ip++;
1927 if (*ip == '\r') ip++;
45b966db
ZW
1928 }
1929 else if (*ip == '\r')
1930 {
04e3ec78 1931 deferred_newlines++;
45b966db
ZW
1932 ip++;
1933 if (*ip == '\n') ip++;
45b966db
ZW
1934 }
1935 else
1936 *op++ = '\\';
04e3ec78 1937 break;
45b966db
ZW
1938
1939 case SPECCASE_QUESTION: /* ? */
1940 {
1941 unsigned int d, t;
04e3ec78
NB
1942
1943 *op++ = '?'; /* Normal non-trigraph case */
1944 if (ip[0] != '?')
1945 break;
1946
45b966db 1947 d = ip[1];
2a87fbe8
ZW
1948 t = chartab[d];
1949 if (NONTRI (t))
04e3ec78 1950 break;
45b966db 1951
ae79697b 1952 if (CPP_OPTION (pfile, warn_trigraphs))
45b966db
ZW
1953 {
1954 unsigned long col;
1955 line_base = find_position (line_base, op, &line);
1956 col = op - line_base + 1;
ae79697b 1957 if (CPP_OPTION (pfile, trigraphs))
45b966db 1958 cpp_warning_with_line (pfile, line, col,
04e3ec78 1959 "trigraph ??%c converted to %c", d, t);
45b966db
ZW
1960 else
1961 cpp_warning_with_line (pfile, line, col,
04e3ec78 1962 "trigraph ??%c ignored", d);
45b966db 1963 }
04e3ec78
NB
1964
1965 ip += 2;
ae79697b 1966 if (CPP_OPTION (pfile, trigraphs))
45b966db 1967 {
04e3ec78 1968 op[-1] = t; /* Overwrite '?' */
45b966db 1969 if (t == '\\')
04e3ec78
NB
1970 {
1971 op--;
1972 *--ip = '\\';
1973 goto do_speccase; /* May need buffer refill */
1974 }
45b966db
ZW
1975 }
1976 else
1977 {
45b966db
ZW
1978 *op++ = '?';
1979 *op++ = d;
1980 }
1981 }
04e3ec78 1982 break;
45b966db
ZW
1983 }
1984 }
f6fab919
ZW
1985 /* Copy previous char plus unprocessed (at most 2) chars
1986 to beginning of buffer, refill it with another
1987 read(), and continue processing */
1988 memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
1989 ip -= count;
45b966db
ZW
1990 }
1991
1992 if (offset == 0)
1993 return 0;
1994
45b966db
ZW
1995 if (op[-1] != '\n')
1996 {
1997 unsigned long col;
1998 line_base = find_position (line_base, op, &line);
1999 col = op - line_base + 1;
f6fab919 2000 cpp_warning_with_line (pfile, line, col, "no newline at end of file");
45b966db
ZW
2001 if (offset + 1 > len)
2002 {
2003 len += 1;
2004 if (offset + 1 > len)
2005 goto too_big;
2006 buf = (U_CHAR *) xrealloc (buf, len);
2007 op = buf + offset;
2008 }
2009 *op++ = '\n';
2010 }
2011
2012 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
2013 return op - buf;
2014
2015 too_big:
f6fab919
ZW
2016 cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
2017 (unsigned long)offset);
45b966db
ZW
2018 free (buf);
2019 return -1;
2020
2021 error:
2022 cpp_error_from_errno (pfile, fp->ihash->name);
2023 free (buf);
2024 return -1;
2025}
2026
2a87fbe8
ZW
2027/* Allocate pfile->input_buffer, and initialize chartab[]
2028 if it hasn't happened already. */
46d07497 2029
45b966db
ZW
2030void
2031_cpp_init_input_buffer (pfile)
2032 cpp_reader *pfile;
2033{
2034 U_CHAR *tmp;
2035
2a87fbe8 2036 init_chartab ();
9e62c811 2037 init_token_list (pfile, &pfile->directbuf, 0);
04e3ec78 2038
45b966db
ZW
2039 /* Determine the appropriate size for the input buffer. Normal C
2040 source files are smaller than eight K. */
04e3ec78
NB
2041 /* 8Kbytes of buffer proper, 1 to detect running off the end without
2042 address arithmetic all the time, and 3 for pushback during buffer
2043 refill, in case there's a potential trigraph or end-of-line
2044 digraph at the end of a block. */
45b966db 2045
04e3ec78 2046 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
45b966db
ZW
2047 pfile->input_buffer = tmp;
2048 pfile->input_buffer_len = 8192;
2049}
c5a04734 2050
6d2c2047
ZW
2051/* Utility routine:
2052 Compares, in the manner of strcmp(3), the token beginning at TOKEN
2053 and extending for LEN characters to the NUL-terminated string
2054 STRING. Typical usage:
2055
2056 if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2057 "inline"))
2058 { ... }
2059 */
2060
2061int
2062cpp_idcmp (token, len, string)
2063 const U_CHAR *token;
2064 size_t len;
2065 const char *string;
2066{
2067 size_t len2 = strlen (string);
2068 int r;
2069
2070 if ((r = memcmp (token, string, MIN (len, len2))))
2071 return r;
2072
2073 /* The longer of the two strings sorts after the shorter. */
2074 if (len == len2)
2075 return 0;
2076 else if (len < len2)
2077 return -1;
2078 else
2079 return 1;
2080}
2081
c5a04734
ZW
2082#if 0
2083
d6d5f795
NB
2084/* Lexing algorithm.
2085
2086 The original lexer in cpplib was made up of two passes: a first pass
2087 that replaced trigraphs and deleted esacped newlines, and a second
2088 pass that tokenized the result of the first pass. Tokenisation was
2089 performed by peeking at the next character in the input stream. For
6777db6d 2090 example, if the input stream contained "!=", the handler for the !
d6d5f795 2091 character would peek at the next character, and if it were a '='
6777db6d
NB
2092 would skip over it, and return a "!=" token, otherwise it would
2093 return just the "!" token.
d6d5f795
NB
2094
2095 To implement a single-pass lexer, this peeking ahead is unworkable.
2096 An arbitrary number of escaped newlines, and trigraphs (in particular
6777db6d
NB
2097 ??/ which translates to the escape \), could separate the '!' and '='
2098 in the input stream, yet the next token is still a "!=".
d6d5f795
NB
2099
2100 Suppose instead that we lex by one logical line at a time, producing
6777db6d
NB
2101 a token list or stack for each logical line, and when seeing the '!'
2102 push a CPP_NOT token on the list. Then if the '!' is part of a
2103 longer token ("!=") we know we must see the remainder of the token by
2104 the time we reach the end of the logical line. Thus we can have the
2105 '=' handler look at the previous token (at the end of the list / top
2106 of the stack) and see if it is a "!" token, and if so, instead of
2107 pushing a "=" token revise the existing token to be a "!=" token.
d6d5f795
NB
2108
2109 This works in the presence of escaped newlines, because the '\' would
2110 have been pushed on the top of the stack as a CPP_BACKSLASH. The
2111 newline ('\n' or '\r') handler looks at the token at the top of the
2112 stack to see if it is a CPP_BACKSLASH, and if so discards both.
2113 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
2114 the '=' handler would never see any intervening escaped newlines.
2115
2116 To make trigraphs work in this context, as in precedence trigraphs
2117 are highest and converted before anything else, the '?' handler does
2118 lookahead to see if it is a trigraph, and if so skips the trigraph
2119 and pushes the token it represents onto the top of the stack. This
2120 also works in the particular case of a CPP_BACKSLASH trigraph.
2121
2122 To the preprocessor, whitespace is only significant to the point of
2123 knowing whether whitespace precedes a particular token. For example,
2124 the '=' handler needs to know whether there was whitespace between it
6777db6d 2125 and a "!" token on the top of the stack, to make the token conversion
d6d5f795
NB
2126 decision correctly. So each token has a PREV_WHITESPACE flag to
2127 indicate this - the standard permits consecutive whitespace to be
2128 regarded as a single space. The compiler front ends are not
2129 interested in whitespace at all; they just require a token stream.
2130 Another place where whitespace is significant to the preprocessor is
2131 a #define statment - if there is whitespace between the macro name
2132 and an initial "(" token the macro is "object-like", otherwise it is
2133 a function-like macro that takes arguments.
2134
2135 However, all is not rosy. Parsing of identifiers, numbers, comments
2136 and strings becomes trickier because of the possibility of raw
2137 trigraphs and escaped newlines in the input stream.
2138
2139 The trigraphs are three consecutive characters beginning with two
c2e25d51
NB
2140 question marks. A question mark is not valid as part of a number or
2141 identifier, so parsing of a number or identifier terminates normally
2142 upon reaching it, returning to the mainloop which handles the
2143 trigraph just like it would in any other position. Similarly for the
2144 backslash of a backslash-newline combination. So we just need the
2145 escaped-newline dropper in the mainloop to check if the token on the
2146 top of the stack after dropping the escaped newline is a number or
2147 identifier, and if so to continue the processing it as if nothing had
2148 happened.
d6d5f795
NB
2149
2150 For strings, we replace trigraphs whenever we reach a quote or
2151 newline, because there might be a backslash trigraph escaping them.
2152 We need to be careful that we start trigraph replacing from where we
2153 left off previously, because it is possible for a first scan to leave
2154 "fake" trigraphs that a second scan would pick up as real (e.g. the
c2e25d51 2155 sequence "????/\n=" would find a fake ??= trigraph after removing the
d6d5f795
NB
2156 escaped newline.)
2157
2158 For line comments, on reaching a newline we scan the previous
2159 character(s) to see if it escaped, and continue if it is. Block
2160 comments ignore everything and just focus on finding the comment
2161 termination mark. The only difficult thing, and it is surprisingly
2162 tricky, is checking if an asterisk precedes the final slash since
2163 they could be separated by escaped newlines. If the preprocessor is
2164 invoked with the output comments option, we don't bother removing
2165 escaped newlines and replacing trigraphs for output.
2166
2167 Finally, numbers can begin with a period, which is pushed initially
2168 as a CPP_DOT token in its own right. The digit handler checks if the
2169 previous token was a CPP_DOT not separated by whitespace, and if so
2170 pops it off the stack and pushes a period into the number's buffer
2171 before calling the number parser.
2172
2173*/
2174
c5a04734
ZW
2175static void expand_comment_space PARAMS ((cpp_toklist *));
2176void init_trigraph_map PARAMS ((void));
2177static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
2178 unsigned char *));
2179static const unsigned char *backslash_start PARAMS ((cpp_reader *,
2180 const unsigned char *));
2181static int skip_block_comment PARAMS ((cpp_reader *));
2182static int skip_line_comment PARAMS ((cpp_reader *));
2183static void skip_whitespace PARAMS ((cpp_reader *, int));
2184static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
2185static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
2186static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
2187 unsigned int));
2188static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
2189static void copy_comment PARAMS ((cpp_toklist *, const unsigned char *,
2190 unsigned int, unsigned int, unsigned int));
2191void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
2192
2193static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
2194
c5a04734
ZW
2195unsigned int spell_string PARAMS ((unsigned char *, cpp_toklist *,
2196 cpp_token *token));
2197unsigned int spell_comment PARAMS ((unsigned char *, cpp_toklist *,
2198 cpp_token *token));
2199unsigned int spell_name PARAMS ((unsigned char *, cpp_toklist *,
2200 cpp_token *token));
c5a04734
ZW
2201
2202typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
2203 cpp_token *));
2204
2205/* Macros on a cpp_name. */
2206#define INIT_NAME(list, name) \
2207 do {(name).len = 0; (name).offset = (list)->name_used;} while (0)
2208
2209#define IS_DIRECTIVE(list) (list->tokens[0].type == CPP_HASH)
2210#define COLUMN(cur) ((cur) - buffer->line_base)
2211
2212/* Maybe put these in the ISTABLE eventually. */
2213#define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
2214#define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
2215
2216/* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
2217 character, if any, is in buffer. */
2218#define handle_newline(cur, limit, c) \
2219 do {\
2220 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
2221 (cur)++; \
2222 CPP_BUMP_LINE_CUR (pfile, (cur)); \
2223 } while (0)
2224
2225#define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
2226#define PREV_TOKEN_TYPE (cur_token[-1].type)
2227
2228#define SPELL_TEXT 0
2229#define SPELL_HANDLER 1
cfd5b8b8
NB
2230#define SPELL_CHAR 2
2231#define SPELL_NONE 3
2232#define SPELL_EOL 4
c5a04734
ZW
2233
2234#define T(e, s) {SPELL_TEXT, s},
2235#define H(e, s) {SPELL_HANDLER, s},
cfd5b8b8 2236#define C(e, s) {SPELL_CHAR, s},
c5a04734
ZW
2237#define N(e, s) {SPELL_NONE, s},
2238#define E(e, s) {SPELL_EOL, s},
2239
2240static const struct token_spelling
2241{
cfd5b8b8 2242 unsigned char type;
c5a04734
ZW
2243 PTR speller;
2244} token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
2245
2246#undef T
2247#undef H
cfd5b8b8 2248#undef C
c5a04734
ZW
2249#undef N
2250#undef E
2251
2252static const unsigned char *digraph_spellings [] = {"%:", "%:%:", "<:",
2253 ":>", "<%", "%>"};
2254
2255static void
2256expand_comment_space (list)
2257 cpp_toklist *list;
2258{
2259 if (list->comments_cap == 0)
2260 {
2261 list->comments_cap = 10;
2262 list->comments = (cpp_token *)
2263 xmalloc (list->comments_cap * sizeof (cpp_token));
2264 }
2265 else
2266 {
2267 list->comments_cap *= 2;
2268 list->comments = (cpp_token *)
2269 xrealloc (list->comments, list->comments_cap);
2270 }
2271}
2272
2273void
2274cpp_free_token_list (list)
2275 cpp_toklist *list;
2276{
2277 if (list->comments)
2278 free (list->comments);
cfd5b8b8 2279 free (list->tokens - 1); /* Backup over dummy token. */
c5a04734
ZW
2280 free (list->namebuf);
2281 free (list);
2282}
2283
cfd5b8b8 2284static unsigned char trigraph_map[256];
c5a04734
ZW
2285
2286void
2287init_trigraph_map ()
2288{
2289 trigraph_map['='] = '#';
2290 trigraph_map['('] = '[';
2291 trigraph_map[')'] = ']';
2292 trigraph_map['/'] = '\\';
2293 trigraph_map['\''] = '^';
2294 trigraph_map['<'] = '{';
2295 trigraph_map['>'] = '}';
2296 trigraph_map['!'] = '|';
2297 trigraph_map['-'] = '~';
2298}
2299
2300/* Call when a trigraph is encountered. It warns if necessary, and
2301 returns true if the trigraph should be honoured. END is the third
2302 character of a trigraph in the input stream. */
2303static int
2304trigraph_ok (pfile, end)
2305 cpp_reader *pfile;
2306 const unsigned char *end;
2307{
2308 int accept = CPP_OPTION (pfile, trigraphs);
2309
2310 if (CPP_OPTION (pfile, warn_trigraphs))
2311 {
2312 unsigned int col = end - 1 - pfile->buffer->line_base;
2313 if (accept)
2314 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2315 "trigraph ??%c converted to %c",
2316 (int) *end, (int) trigraph_map[*end]);
2317 else
2318 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2319 "trigraph ??%c ignored", (int) *end);
2320 }
2321 return accept;
2322}
2323
2324/* Scan a string for trigraphs, warning or replacing them inline as
2325 appropriate. When parsing a string, we must call this routine
2326 before processing a newline character (if trigraphs are enabled),
2327 since the newline might be escaped by a preceding backslash
2328 trigraph sequence. Returns a pointer to the end of the name after
2329 replacement. */
2330
2331static unsigned char*
2332trigraph_replace (pfile, src, limit)
2333 cpp_reader *pfile;
2334 unsigned char *src;
2335 unsigned char* limit;
2336{
2337 unsigned char *dest;
2338
2339 /* Starting with src[1], find two consecutive '?'. The case of no
2340 trigraphs is streamlined. */
2341
2342 for (; src + 1 < limit; src += 2)
2343 {
2344 if (src[0] != '?')
2345 continue;
2346
2347 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
2348 if (src[-1] == '?')
2349 src--;
2350 else if (src + 2 == limit || src[1] != '?')
2351 continue;
2352
2353 /* Check if it really is a trigraph. */
2354 if (trigraph_map[src[2]] == 0)
2355 continue;
2356
2357 dest = src;
2358 goto trigraph_found;
2359 }
2360 return limit;
2361
2362 /* Now we have a trigraph, we need to scan the remaining buffer, and
2363 copy-shifting its contents left if replacement is enabled. */
2364 for (; src + 2 < limit; dest++, src++)
2365 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2366 {
2367 trigraph_found:
2368 src += 2;
2369 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2370 *dest = trigraph_map[*src];
2371 }
2372
2373 /* Copy remaining (at most 2) characters. */
2374 while (src < limit)
2375 *dest++ = *src++;
2376 return dest;
2377}
2378
2379/* If CUR is a backslash or the end of a trigraphed backslash, return
2380 a pointer to its beginning, otherwise NULL. We don't read beyond
2381 the buffer start, because there is the start of the comment in the
2382 buffer. */
2383static const unsigned char *
2384backslash_start (pfile, cur)
2385 cpp_reader *pfile;
2386 const unsigned char *cur;
2387{
2388 if (cur[0] == '\\')
2389 return cur;
2390 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2391 && trigraph_ok (pfile, cur))
2392 return cur - 2;
2393 return 0;
2394}
2395
2396/* Skip a C-style block comment. This is probably the trickiest
2397 handler. We find the end of the comment by seeing if an asterisk
2398 is before every '/' we encounter. The nasty complication is that a
2399 previous asterisk may be separated by one or more escaped newlines.
2400 Returns non-zero if comment terminated by EOF, zero otherwise. */
2401static int
2402skip_block_comment (pfile)
2403 cpp_reader *pfile;
2404{
2405 cpp_buffer *buffer = pfile->buffer;
2406 const unsigned char *char_after_star = 0;
2407 register const unsigned char *cur = buffer->cur;
2408 int seen_eof = 0;
2409
2410 /* Inner loop would think the comment has ended if the first comment
2411 character is a '/'. Avoid this and keep the inner loop clean by
2412 skipping such a character. */
2413 if (cur < buffer->rlimit && cur[0] == '/')
2414 cur++;
2415
2416 for (; cur < buffer->rlimit; )
2417 {
2418 unsigned char c = *cur++;
2419
2420 /* People like decorating comments with '*', so check for
2421 '/' instead for efficiency. */
2422 if (c == '/')
2423 {
2424 if (cur[-2] == '*' || cur - 1 == char_after_star)
2425 goto out;
2426
2427 /* Warn about potential nested comments, but not when
2428 the final character inside the comment is a '/'.
2429 Don't bother to get it right across escaped newlines. */
2430 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2431 && cur[0] == '*' && cur[1] != '/')
2432 {
2433 buffer->cur = cur;
2434 cpp_warning (pfile, "'/*' within comment");
2435 }
2436 }
2437 else if (IS_NEWLINE(c))
2438 {
2439 const unsigned char* bslash = backslash_start (pfile, cur - 2);
2440
2441 handle_newline (cur, buffer->rlimit, c);
2442 /* Work correctly if there is an asterisk before an
2443 arbirtrarily long sequence of escaped newlines. */
2444 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2445 char_after_star = cur;
2446 else
2447 char_after_star = 0;
2448 }
2449 }
2450 seen_eof = 1;
2451
2452 out:
2453 buffer->cur = cur;
2454 return seen_eof;
2455}
2456
2457/* Skip a C++ or Chill line comment. Handles escaped newlines.
2458 Returns non-zero if a multiline comment. */
2459static int
2460skip_line_comment (pfile)
2461 cpp_reader *pfile;
2462{
2463 cpp_buffer *buffer = pfile->buffer;
2464 register const unsigned char *cur = buffer->cur;
2465 int multiline = 0;
2466
2467 for (; cur < buffer->rlimit; )
2468 {
2469 unsigned char c = *cur++;
2470
2471 if (IS_NEWLINE (c))
2472 {
2473 /* Check for a (trigaph?) backslash escaping the newline. */
2474 if (!backslash_start (pfile, cur - 2))
2475 goto out;
2476 multiline = 1;
2477 handle_newline (cur, buffer->rlimit, c);
2478 }
2479 }
2480 cur++;
2481
2482 out:
2483 buffer->cur = cur - 1; /* Leave newline for caller. */
2484 return multiline;
2485}
2486
2487/* Skips whitespace, stopping at next non-whitespace character. */
2488static void
2489skip_whitespace (pfile, in_directive)
2490 cpp_reader *pfile;
2491 int in_directive;
2492{
2493 cpp_buffer *buffer = pfile->buffer;
2494 register const unsigned char *cur = buffer->cur;
2495 unsigned short null_count = 0;
2496
2497 for (; cur < buffer->rlimit; )
2498 {
2499 unsigned char c = *cur++;
2500
2501 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
2502 continue;
2503 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
2504 goto out;
2505 if (c == '\0')
2506 null_count++;
2507 /* Mut be '\f' or '\v' */
2508 else if (in_directive && CPP_PEDANTIC (pfile))
2509 cpp_pedwarn (pfile, "%s in preprocessing directive",
2510 c == '\f' ? "formfeed" : "vertical tab");
2511 }
2512 cur++;
2513
2514 out:
2515 buffer->cur = cur - 1;
2516 if (null_count)
2517 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2518 : "embedded null character ignored");
2519}
2520
2521/* Parse (append) an identifier. */
2522static void
2523parse_name (pfile, list, name)
2524 cpp_reader *pfile;
2525 cpp_toklist *list;
2526 cpp_name *name;
2527{
2528 const unsigned char *name_limit;
2529 unsigned char *namebuf;
2530 cpp_buffer *buffer = pfile->buffer;
2531 register const unsigned char *cur = buffer->cur;
2532
2533 expanded:
2534 name_limit = list->namebuf + list->name_cap;
2535 namebuf = list->namebuf + list->name_used;
2536
2537 for (; cur < buffer->rlimit && namebuf < name_limit; )
2538 {
2539 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2540
2541 if (! is_idchar(c))
2542 goto out;
2543 namebuf++;
2544 cur++;
2545 if (c == '$' && CPP_PEDANTIC (pfile))
2546 {
2547 buffer->cur = cur;
2548 cpp_pedwarn (pfile, "'$' character in identifier");
2549 }
2550 }
2551
2552 /* Run out of name space? */
2553 if (cur < buffer->rlimit)
2554 {
2555 list->name_used = namebuf - list->namebuf;
2556 auto_expand_name_space (list);
2557 goto expanded;
2558 }
2559
2560 out:
2561 buffer->cur = cur;
2562 name->len = namebuf - (list->namebuf + name->offset);
2563 list->name_used = namebuf - list->namebuf;
2564}
2565
2566/* Parse (append) a number. */
2567
2568#define VALID_SIGN(c, prevc) \
2569 (((c) == '+' || (c) == '-') && \
2570 ((prevc) == 'e' || (prevc) == 'E' \
2571 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2572
2573static void
2574parse_number (pfile, list, name)
2575 cpp_reader *pfile;
2576 cpp_toklist *list;
2577 cpp_name *name;
2578{
2579 const unsigned char *name_limit;
2580 unsigned char *namebuf;
2581 cpp_buffer *buffer = pfile->buffer;
2582 register const unsigned char *cur = buffer->cur;
2583
2584 expanded:
2585 name_limit = list->namebuf + list->name_cap;
2586 namebuf = list->namebuf + list->name_used;
2587
2588 for (; cur < buffer->rlimit && namebuf < name_limit; )
2589 {
2590 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2591
2592 /* Perhaps we should accept '$' here if we accept it for
2593 identifiers. We know namebuf[-1] is safe, because for c to
2594 be a sign we must have pushed at least one character. */
2595 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2596 goto out;
2597
2598 namebuf++;
2599 cur++;
2600 }
2601
2602 /* Run out of name space? */
2603 if (cur < buffer->rlimit)
2604 {
2605 list->name_used = namebuf - list->namebuf;
2606 auto_expand_name_space (list);
2607 goto expanded;
2608 }
2609
2610 out:
2611 buffer->cur = cur;
2612 name->len = namebuf - (list->namebuf + name->offset);
2613 list->name_used = namebuf - list->namebuf;
2614}
2615
2616/* Places a string terminated by an unescaped TERMINATOR into a
2617 cpp_name, which should be expandable and thus at the top of the
2618 list's stack. Handles embedded trigraphs, if necessary, and
2619 escaped newlines.
2620
2621 Can be used for character constants (terminator = '\''), string
2622 constants ('"'), angled headers ('>') and assertions (')'). */
2623
2624static void
2625parse_string (pfile, list, name, terminator)
2626 cpp_reader *pfile;
2627 cpp_toklist *list;
2628 cpp_name *name;
2629 unsigned int terminator;
2630{
2631 cpp_buffer *buffer = pfile->buffer;
2632 register const unsigned char *cur = buffer->cur;
2633 const unsigned char *name_limit;
2634 unsigned char *namebuf;
2635 unsigned int null_count = 0;
2636 int trigraphed_len = 0;
2637
2638 expanded:
2639 name_limit = list->namebuf + list->name_cap;
2640 namebuf = list->namebuf + list->name_used;
2641
2642 for (; cur < buffer->rlimit && namebuf < name_limit; )
2643 {
2644 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
2645
2646 if (c == '\0')
2647 null_count++;
2648 else if (c == terminator || IS_NEWLINE (c))
2649 {
2650 unsigned char* name_start = list->namebuf + name->offset;
2651
2652 /* Needed for trigraph_replace and multiline string warning. */
2653 buffer->cur = cur;
2654
2655 /* Scan for trigraphs before checking if backslash-escaped. */
2656 if (CPP_OPTION (pfile, trigraphs)
2657 || CPP_OPTION (pfile, warn_trigraphs))
2658 {
2659 namebuf = trigraph_replace (pfile, name_start + trigraphed_len,
2660 namebuf);
2661 trigraphed_len = namebuf - 2 - (name_start + trigraphed_len);
2662 if (trigraphed_len < 0)
2663 trigraphed_len = 0;
2664 }
2665
2666 namebuf--; /* Drop the newline / terminator from the name. */
2667 if (IS_NEWLINE (c))
2668 {
2669 /* Drop a backslash newline, and continue. */
2670 if (namebuf[-1] == '\\')
2671 {
2672 handle_newline (cur, buffer->rlimit, c);
2673 namebuf--;
2674 continue;
2675 }
2676
2677 cur--;
2678
2679 /* In Fortran and assembly language, silently terminate
2680 strings of either variety at end of line. This is a
2681 kludge around not knowing where comments are in these
2682 languages. */
2683 if (CPP_OPTION (pfile, lang_fortran)
2684 || CPP_OPTION (pfile, lang_asm))
2685 goto out;
2686
2687 /* Character constants, headers and asserts may not
2688 extend over multiple lines. In Standard C, neither
2689 may strings. We accept multiline strings as an
2690 extension, but not in directives. */
2691 if (terminator != '"' || IS_DIRECTIVE (list))
2692 goto unterminated;
2693
2694 cur++; /* Move forwards again. */
2695
2696 if (pfile->multiline_string_line == 0)
2697 {
2698 pfile->multiline_string_line = list->line;
2699 if (CPP_PEDANTIC (pfile))
2700 cpp_pedwarn (pfile, "multi-line string constant");
2701 }
2702
2703 *namebuf++ = '\n';
2704 handle_newline (cur, buffer->rlimit, c);
2705 }
2706 else
2707 {
2708 unsigned char *temp;
2709
2710 /* An odd number of consecutive backslashes represents
2711 an escaped terminator. */
2712 temp = namebuf - 1;
2713 while (temp >= name_start && *temp == '\\')
2714 temp--;
2715
2716 if ((namebuf - temp) & 1)
2717 goto out;
2718 namebuf++;
2719 }
2720 }
2721 }
2722
2723 /* Run out of name space? */
2724 if (cur < buffer->rlimit)
2725 {
2726 list->name_used = namebuf - list->namebuf;
2727 auto_expand_name_space (list);
2728 goto expanded;
2729 }
2730
2731 /* We may not have trigraph-replaced the input for this code path,
2732 but as the input is in error by being unterminated we don't
2733 bother. Prevent warnings about no newlines at EOF. */
2734 if (IS_NEWLINE(cur[-1]))
2735 cur--;
2736
2737 unterminated:
2738 cpp_error (pfile, "missing terminating %c character", (int) terminator);
2739
2740 if (terminator == '\"' && pfile->multiline_string_line != list->line
2741 && pfile->multiline_string_line != 0)
2742 {
2743 cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2744 "possible start of unterminated string literal");
2745 pfile->multiline_string_line = 0;
2746 }
2747
2748 out:
2749 buffer->cur = cur;
2750 name->len = namebuf - (list->namebuf + name->offset);
2751 list->name_used = namebuf - list->namebuf;
2752
2753 if (null_count > 0)
2754 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2755 : "null character preserved"));
2756}
2757
2758/* The character C helps us distinguish comment types: '*' = C style,
2759 '-' = Chill-style and '/' = C++ style. For code simplicity, the
2760 stored comment includes any C-style comment terminator. */
2761static void
2762copy_comment (list, from, len, tok_no, type)
2763 cpp_toklist *list;
2764 const unsigned char *from;
2765 unsigned int len;
2766 unsigned int tok_no;
2767 unsigned int type;
2768{
2769 cpp_token *comment;
2770
2771 if (list->comments_used == list->comments_cap)
2772 expand_comment_space (list);
2773
2774 if (list->name_used + len > list->name_cap)
2775 expand_name_space (list, len);
2776
2777 comment = &list->comments[list->comments_used++];
2778 comment->type = type;
2779 comment->aux = tok_no;
2780 comment->val.name.len = len;
2781 comment->val.name.offset = list->name_used;
2782
2783 memcpy (list->namebuf + list->name_used, from, len);
2784 list->name_used += len;
2785}
2786
2787/*
2788 * The tokenizer's main loop. Returns a token list, representing a
2789 * logical line in the input file, terminated with a CPP_VSPACE
2790 * token. On EOF, a token list containing the single CPP_EOF token
2791 * is returned.
2792 *
2793 * Implementation relies almost entirely on lookback, rather than
2794 * looking forwards. This means that tokenization requires just
2795 * a single pass of the file, even in the presence of trigraphs and
2796 * escaped newlines, providing significant performance benefits.
2797 * Trigraph overhead is negligible if they are disabled, and low
2798 * even when enabled.
2799 */
2800
2801#define PUSH_TOKEN(ttype) cur_token++->type = ttype
2802#define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
2803#define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
2804#define BACKUP_DIGRAPH(ttype) do { \
2805 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
2806
2807void
2808_cpp_lex_line (pfile, list)
2809 cpp_reader *pfile;
2810 cpp_toklist *list;
2811{
2812 cpp_token *cur_token, *token_limit;
2813 cpp_buffer *buffer = pfile->buffer;
2814 register const unsigned char *cur = buffer->cur;
2815 unsigned char flags = 0;
2816
2817 expanded:
2818 token_limit = list->tokens + list->tokens_cap;
2819 cur_token = list->tokens + list->tokens_used;
2820
2821 for (; cur < buffer->rlimit && cur_token < token_limit;)
2822 {
2823 unsigned char c = *cur++;
2824
2825 /* Optimize whitespace skipping, in particular the case of a
2826 single whitespace character, as every other token is probably
2827 whitespace. (' ' '\t' '\v' '\f' '\0'). */
2828 if (is_hspace ((unsigned int) c))
2829 {
2830 if (c == '\0' || (cur < buffer->rlimit && is_hspace (*cur)))
2831 {
2832 buffer->cur = cur - (c == '\0'); /* Get the null warning. */
2833 skip_whitespace (pfile, IS_DIRECTIVE (list));
2834 cur = buffer->cur;
2835 }
2836 flags = PREV_WHITESPACE;
2837 if (cur == buffer->rlimit)
2838 break;
2839 c = *cur++;
2840 }
2841
2842 /* Initialize current token. Its type is set in the switch. */
2843 cur_token->col = COLUMN (cur);
2844 cur_token->flags = flags;
2845 flags = 0;
2846
2847 switch (c)
2848 {
2849 case '0': case '1': case '2': case '3': case '4':
2850 case '5': case '6': case '7': case '8': case '9':
2851 /* Prepend an immediately previous CPP_DOT token. */
2852 if (PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ())
2853 {
2854 cur_token--;
2855 if (list->name_cap == list->name_used)
2856 auto_expand_name_space (list);
2857
2858 cur_token->val.name.len = 1;
2859 cur_token->val.name.offset = list->name_used;
2860 list->namebuf[list->name_used++] = '.';
2861 }
2862 else
2863 INIT_NAME (list, cur_token->val.name);
2864 cur--; /* Backup character. */
2865
2866 continue_number:
2867 buffer->cur = cur;
2868 parse_number (pfile, list, &cur_token->val.name);
2869 cur = buffer->cur;
2870
2871 PUSH_TOKEN (CPP_NUMBER); /* Number not yet interpreted. */
2872 break;
2873
2874 letter:
2875 case '_':
2876 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2877 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2878 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2879 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2880 case 'y': case 'z':
2881 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2882 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2883 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2884 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2885 case 'Y': case 'Z':
2886 INIT_NAME (list, cur_token->val.name);
2887 cur--; /* Backup character. */
2888 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
2889
2890 continue_name:
2891 buffer->cur = cur;
2892 parse_name (pfile, list, &cur_token->val.name);
2893 cur = buffer->cur;
2894
2895 /* Find handler for newly created / extended directive. */
2896 if (IS_DIRECTIVE (list) && cur_token == &list->tokens[1])
2897 _cpp_check_directive (list, cur_token);
2898 cur_token++;
2899 break;
2900
2901 case '\'':
2902 /* Fall through. */
2903 case '\"':
2904 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2905 /* Do we have a wide string? */
2906 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2907 && cur_token[-1].val.name.len == 1
2908 && TOK_NAME (list, cur_token - 1)[0] == 'L'
2909 && !CPP_TRADITIONAL (pfile))
2910 {
2911 /* No need for 'L' any more. */
2912 list->name_used--;
2913 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
2914 }
2915
2916 do_parse_string:
2917 /* Here c is one of ' " > or ). */
2918 INIT_NAME (list, cur_token->val.name);
2919 buffer->cur = cur;
2920 parse_string (pfile, list, &cur_token->val.name, c);
2921 cur = buffer->cur;
2922 cur_token++;
2923 break;
2924
2925 case '/':
2926 cur_token->type = CPP_DIV;
2927 if (IMMED_TOKEN ())
2928 {
2929 if (PREV_TOKEN_TYPE == CPP_DIV)
2930 {
2931 /* We silently allow C++ comments in system headers,
2932 irrespective of conformance mode, because lots of
2933 broken systems do that and trying to clean it up
2934 in fixincludes is a nightmare. */
2935 if (buffer->system_header_p)
2936 goto do_line_comment;
2937 else if (CPP_OPTION (pfile, cplusplus_comments))
2938 {
2939 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
2940 && ! buffer->warned_cplusplus_comments)
2941 {
2942 buffer->cur = cur;
2943 cpp_pedwarn (pfile,
2944 "C++ style comments are not allowed in ISO C89");
2945 cpp_pedwarn (pfile,
2946 "(this will be reported only once per input file)");
2947 buffer->warned_cplusplus_comments = 1;
2948 }
2949 do_line_comment:
2950 buffer->cur = cur;
2951 if (cur[-2] != c)
2952 cpp_warning (pfile,
2953 "comment start split across lines");
2954 if (skip_line_comment (pfile))
2955 cpp_error_with_line (pfile, list->line,
2956 cur_token[-1].col,
2957 "multi-line comment");
2958 if (!CPP_OPTION (pfile, discard_comments))
2959 copy_comment (list, cur, buffer->cur - cur,
2960 cur_token - 1 - list->tokens, c == '/'
2961 ? CPP_CPP_COMMENT: CPP_CHILL_COMMENT);
2962 cur = buffer->cur;
2963
2964 /* Back-up to first '-' or '/'. */
2965 cur_token -= 2;
2966 if (!CPP_OPTION (pfile, traditional))
2967 flags = PREV_WHITESPACE;
2968 }
2969 }
2970 }
2971 cur_token++;
2972 break;
2973
2974 case '*':
2975 cur_token->type = CPP_MULT;
2976 if (IMMED_TOKEN ())
2977 {
2978 if (PREV_TOKEN_TYPE == CPP_DIV)
2979 {
2980 buffer->cur = cur;
2981 if (cur[-2] != '/')
2982 cpp_warning (pfile,
2983 "comment start '/*' split across lines");
2984 if (skip_block_comment (pfile))
2985 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
2986 "unterminated comment");
2987 else if (buffer->cur[-2] != '*')
2988 cpp_warning (pfile,
2989 "comment end '*/' split across lines");
2990 if (!CPP_OPTION (pfile, discard_comments))
2991 copy_comment (list, cur, buffer->cur - cur,
2992 cur_token - 1 - list->tokens, CPP_C_COMMENT);
2993 cur = buffer->cur;
2994
2995 cur_token -= 2;
2996 if (!CPP_OPTION (pfile, traditional))
2997 flags = PREV_WHITESPACE;
2998 }
2999 else if (CPP_OPTION (pfile, cplusplus))
3000 {
3001 /* In C++, there are .* and ->* operators. */
3002 if (PREV_TOKEN_TYPE == CPP_DEREF)
3003 BACKUP_TOKEN (CPP_DEREF_STAR);
3004 else if (PREV_TOKEN_TYPE == CPP_DOT)
3005 BACKUP_TOKEN (CPP_DOT_STAR);
3006 }
3007 }
3008 cur_token++;
3009 break;
3010
3011 case '\n':
3012 case '\r':
3013 handle_newline (cur, buffer->rlimit, c);
3014 if (PREV_TOKEN_TYPE != CPP_BACKSLASH || !IMMED_TOKEN ())
3015 {
3016 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
3017 {
3018 buffer->cur = cur;
3019 cpp_warning (pfile,
3020 "backslash and newline separated by space");
3021 }
3022 PUSH_TOKEN (CPP_VSPACE);
3023 goto out;
3024 }
3025 /* Remove the escaped newline. Then continue to process
3026 any interrupted name or number. */
3027 cur_token--;
3028 if (IMMED_TOKEN ())
3029 {
3030 cur_token--;
3031 if (cur_token->type == CPP_NAME)
3032 goto continue_name;
3033 else if (cur_token->type == CPP_NUMBER)
3034 goto continue_number;
3035 cur_token++;
3036 }
3037 break;
3038
3039 case '-':
3040 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3041 {
3042 if (CPP_OPTION (pfile, chill))
3043 goto do_line_comment;
3044 REVISE_TOKEN (CPP_MINUS_MINUS);
3045 }
3046 else
3047 PUSH_TOKEN (CPP_MINUS);
3048 break;
3049
3050 /* The digraph flag checking ensures that ## and %:%:
3051 are interpreted as CPP_PASTE, but #%: and %:# are not. */
3052 make_hash:
3053 case '#':
3054 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3055 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3056 REVISE_TOKEN (CPP_PASTE);
3057 else
3058 PUSH_TOKEN (CPP_HASH);
3059 break;
3060
3061 case ':':
3062 cur_token->type = CPP_COLON;
3063 if (IMMED_TOKEN ())
3064 {
3065 if (PREV_TOKEN_TYPE == CPP_COLON
3066 && CPP_OPTION (pfile, cplusplus))
3067 BACKUP_TOKEN (CPP_SCOPE);
3068 /* Digraph: "<:" is a '[' */
3069 else if (PREV_TOKEN_TYPE == CPP_LESS)
3070 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3071 /* Digraph: "%:" is a '#' */
3072 else if (PREV_TOKEN_TYPE == CPP_MOD)
3073 {
3074 (--cur_token)->flags |= DIGRAPH;
3075 goto make_hash;
3076 }
3077 }
3078 cur_token++;
3079 break;
3080
3081 case '&':
3082 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3083 REVISE_TOKEN (CPP_AND_AND);
3084 else
3085 PUSH_TOKEN (CPP_AND);
3086 break;
3087
3088 make_or:
3089 case '|':
3090 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3091 REVISE_TOKEN (CPP_OR_OR);
3092 else
3093 PUSH_TOKEN (CPP_OR);
3094 break;
3095
3096 case '+':
3097 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3098 REVISE_TOKEN (CPP_PLUS_PLUS);
3099 else
3100 PUSH_TOKEN (CPP_PLUS);
3101 break;
3102
3103 case '=':
3104 /* This relies on equidistance of "?=" and "?" tokens. */
3105 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3106 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3107 else
3108 PUSH_TOKEN (CPP_EQ);
3109 break;
3110
3111 case '>':
3112 cur_token->type = CPP_GREATER;
3113 if (IMMED_TOKEN ())
3114 {
3115 if (PREV_TOKEN_TYPE == CPP_GREATER)
3116 BACKUP_TOKEN (CPP_RSHIFT);
3117 else if (PREV_TOKEN_TYPE == CPP_MINUS)
3118 BACKUP_TOKEN (CPP_DEREF);
3119 /* Digraph: ":>" is a ']' */
3120 else if (PREV_TOKEN_TYPE == CPP_COLON)
3121 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3122 /* Digraph: "%>" is a '}' */
3123 else if (PREV_TOKEN_TYPE == CPP_MOD)
3124 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3125 }
3126 cur_token++;
3127 break;
3128
3129 case '<':
3130 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3131 {
3132 REVISE_TOKEN (CPP_LSHIFT);
3133 break;
3134 }
3135 /* Is this the beginning of a header name? */
3136 if (list->dir_flags & SYNTAX_INCLUDE)
3137 {
3138 c = '>'; /* Terminator. */
3139 cur_token->type = CPP_HEADER_NAME;
3140 goto do_parse_string;
3141 }
3142 PUSH_TOKEN (CPP_LESS);
3143 break;
3144
3145 case '%':
3146 /* Digraph: "<%" is a '{' */
3147 cur_token->type = CPP_MOD;
3148 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3149 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3150 cur_token++;
3151 break;
3152
c5a04734
ZW
3153 case '(':
3154 /* Is this the beginning of an assertion string? */
3155 if (list->dir_flags & SYNTAX_ASSERT)
3156 {
3157 c = ')'; /* Terminator. */
3158 cur_token->type = CPP_ASSERTION;
3159 goto do_parse_string;
3160 }
3161 PUSH_TOKEN (CPP_OPEN_PAREN);
3162 break;
3163
c5a04734
ZW
3164 case '?':
3165 if (cur + 1 < buffer->rlimit && *cur == '?'
3166 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3167 {
3168 /* Handle trigraph. */
3169 cur++;
3170 switch (*cur++)
3171 {
3172 case '(': goto make_open_square;
3173 case ')': goto make_close_square;
3174 case '<': goto make_open_brace;
3175 case '>': goto make_close_brace;
3176 case '=': goto make_hash;
3177 case '!': goto make_or;
3178 case '-': goto make_complement;
3179 case '/': goto make_backslash;
3180 case '\'': goto make_xor;
3181 }
3182 }
3183 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3184 {
3185 /* GNU C++ defines <? and >? operators. */
3186 if (PREV_TOKEN_TYPE == CPP_LESS)
3187 {
3188 REVISE_TOKEN (CPP_MIN);
3189 break;
3190 }
3191 else if (PREV_TOKEN_TYPE == CPP_GREATER)
3192 {
3193 REVISE_TOKEN (CPP_MAX);
3194 break;
3195 }
3196 }
3197 PUSH_TOKEN (CPP_QUERY);
3198 break;
3199
3200 case '.':
3201 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3202 && IMMED_TOKEN ()
3203 && !(cur_token[-1].flags & PREV_WHITESPACE))
3204 {
3205 cur_token -= 2;
3206 PUSH_TOKEN (CPP_ELLIPSIS);
3207 }
3208 else
3209 PUSH_TOKEN (CPP_DOT);
3210 break;
3211
cfd5b8b8
NB
3212 make_complement:
3213 case '~': PUSH_TOKEN (CPP_COMPL); break;
c5a04734
ZW
3214 make_xor:
3215 case '^': PUSH_TOKEN (CPP_XOR); break;
3216 make_open_brace:
3217 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3218 make_close_brace:
3219 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3220 make_open_square:
3221 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3222 make_close_square:
3223 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3224 make_backslash:
3225 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3226 case '!': PUSH_TOKEN (CPP_NOT); break;
3227 case ',': PUSH_TOKEN (CPP_COMMA); break;
3228 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
cfd5b8b8 3229 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
c5a04734
ZW
3230
3231 case '$':
3232 if (CPP_OPTION (pfile, dollars_in_ident))
3233 goto letter;
3234 /* Fall through */
3235 default:
3236 cur_token->aux = c;
3237 PUSH_TOKEN (CPP_OTHER);
3238 break;
3239 }
3240 }
3241
3242 /* Run out of token space? */
3243 if (cur_token == token_limit)
3244 {
3245 list->tokens_used = cur_token - list->tokens;
3246 expand_token_space (list);
3247 goto expanded;
3248 }
3249
3250 cur_token->type = CPP_EOF;
3251 cur_token->flags = flags;
3252
3253 if (cur_token != &list->tokens[0])
3254 {
3255 /* Next call back will get just a CPP_EOF. */
3256 buffer->cur = cur;
3257 cpp_warning (pfile, "no newline at end of file");
3258 PUSH_TOKEN (CPP_VSPACE);
3259 }
3260
3261 out:
3262 buffer->cur = cur;
3263
3264 list->tokens_used = cur_token - list->tokens;
3265
3266 /* FIXME: take this check out and put it in the caller.
3267 list->directive == 0 indicates an unknown directive (but null
3268 directive is OK). This is the first time we can be sure the
3269 directive is invalid, and thus warn about it, because it might
3270 have been split by escaped newlines. Also, don't complain about
3271 invalid directives in assembly source, we don't know where the
3272 comments are, and # may introduce assembler pseudo-ops. */
3273
3274 if (IS_DIRECTIVE (list) && list->dir_handler == 0
3275 && list->tokens[1].type != CPP_VSPACE
3276 && !CPP_OPTION (pfile, lang_asm))
3277 cpp_error_with_line (pfile, list->line, list->tokens[1].col,
3278 "invalid preprocessing directive");
3279}
3280
3281/* Token spelling functions. Used for output of a preprocessed file,
3282 stringizing and token pasting. They all assume sufficient buffer
3283 is allocated, and return exactly how much they used. */
3284
c5a04734
ZW
3285/* Needs buffer of 3 + len. */
3286unsigned int
3287spell_string (buffer, list, token)
3288 unsigned char *buffer;
3289 cpp_toklist *list;
3290 cpp_token *token;
3291{
cfd5b8b8 3292 unsigned char c, *orig_buff = buffer;
c5a04734
ZW
3293 size_t len;
3294
cfd5b8b8 3295 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
c5a04734 3296 *buffer++ = 'L';
cfd5b8b8
NB
3297 c = token->type == CPP_STRING || token->type == CPP_WSTRING ? '"': '\'';
3298 *buffer++ = c;
c5a04734
ZW
3299
3300 len = token->val.name.len;
3301 memcpy (buffer, TOK_NAME (list, token), len);
3302 buffer += len;
cfd5b8b8 3303 *buffer++ = c;
c5a04734
ZW
3304 return buffer - orig_buff;
3305}
3306
3307/* Needs buffer of len + 2. */
3308unsigned int
3309spell_comment (buffer, list, token)
3310 unsigned char *buffer;
3311 cpp_toklist *list;
3312 cpp_token *token;
3313{
3314 size_t len;
3315
3316 if (token->type == CPP_C_COMMENT)
3317 {
3318 *buffer++ = '/';
3319 *buffer++ = '*';
3320 }
3321 else if (token->type == CPP_CPP_COMMENT)
3322 {
3323 *buffer++ = '/';
3324 *buffer++ = '/';
3325 }
3326 else
3327 {
3328 *buffer++ = '-';
3329 *buffer++ = '-';
3330 }
3331
3332 len = token->val.name.len;
3333 memcpy (buffer, TOK_NAME (list, token), len);
3334
3335 return len + 2;
3336}
3337
3338/* Needs buffer of len. */
3339unsigned int
3340spell_name (buffer, list, token)
3341 unsigned char *buffer;
3342 cpp_toklist *list;
3343 cpp_token *token;
3344{
3345 size_t len;
3346
3347 len = token->val.name.len;
3348 memcpy (buffer, TOK_NAME (list, token), len);
3349 buffer += len;
3350
3351 return len;
3352}
3353
c5a04734
ZW
3354void
3355_cpp_lex_file (pfile)
3356 cpp_reader* pfile;
3357{
3358 int recycle;
3359 cpp_toklist* list;
3360
3361 init_trigraph_map ();
3362 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3363
3364 for (recycle = 0; ;)
3365 {
3366 init_token_list (pfile, list, recycle);
3367 recycle = 1;
3368
3369 _cpp_lex_line (pfile, list);
3370 if (list->tokens[0].type == CPP_EOF)
3371 break;
3372
3373 if (list->dir_handler)
3374 {
3375 if (list->dir_handler (pfile))
3376 {
3377 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3378 recycle = 0;
3379 }
3380 }
3381 else
3382 _cpp_output_list (pfile, list);
3383 }
3384}
3385
cfd5b8b8
NB
3386/* This could be useful to other routines. If you allocate this many
3387 bytes, you have enough room to spell the token. */
3388#define TOKEN_LEN(token) (4 + (token_spellings[token->type].type == \
3389 SPELL_HANDLER ? token->val.name.len: 0))
3390
c5a04734
ZW
3391static void
3392_cpp_output_list (pfile, list)
3393 cpp_reader *pfile;
3394 cpp_toklist *list;
3395{
3396 unsigned int comment_no = 0;
3397 cpp_token *token, *comment_token = 0;
3398
3399 if (list->comments_used > 0)
3400 comment_token = list->tokens + list->comments[0].aux;
3401
3402 CPP_RESERVE (pfile, 2); /* Always have room for " \n". */
3403 for (token = &list->tokens[0];; token++)
3404 {
3405 if (token->flags & PREV_WHITESPACE)
3406 {
3407 /* Output comments if -C. Otherwise a space will do. */
3408 if (token == comment_token)
3409 {
3410 cpp_token *comment = &list->comments[comment_no];
3411 do
3412 {
cfd5b8b8 3413 CPP_RESERVE (pfile, 2 + TOKEN_LEN (comment));
c5a04734
ZW
3414 pfile->limit += spell_comment (pfile->limit, list, comment);
3415 comment_no++, comment++;
3416 if (comment_no == list->comments_used)
3417 break;
3418 comment_token = comment->aux + list->tokens;
3419 }
3420 while (comment_token == token);
3421 }
3422 else
3423 CPP_PUTC_Q (pfile, ' ');
3424 }
3425
cfd5b8b8 3426 CPP_RESERVE (pfile, 2 + TOKEN_LEN (token));
c5a04734
ZW
3427 switch (token_spellings[token->type].type)
3428 {
3429 case SPELL_TEXT:
3430 {
3431 const unsigned char *spelling;
3432 unsigned char c;
3433
c5a04734 3434 if (token->flags & DIGRAPH)
cfd5b8b8 3435 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
c5a04734
ZW
3436 else
3437 spelling = token_spellings[token->type].speller;
3438
3439 while ((c = *spelling++) != '\0')
3440 CPP_PUTC_Q (pfile, c);
3441 }
3442 break;
3443
3444 case SPELL_HANDLER:
3445 {
3446 speller s;
3447
3448 s = (speller) token_spellings[token->type].speller;
c5a04734
ZW
3449 pfile->limit += s (pfile->limit, list, token);
3450 }
3451 break;
3452
cfd5b8b8
NB
3453 case SPELL_CHAR:
3454 *pfile->limit++ = token->aux;
3455 break;
3456
c5a04734
ZW
3457 case SPELL_EOL:
3458 CPP_PUTC_Q (pfile, '\n');
3459 return;
3460
3461 case SPELL_NONE:
3462 cpp_error (pfile, "Unwriteable token");
3463 break;
3464 }
3465 }
3466}
3467
3468#endif
This page took 0.430957 seconds and 5 git commands to generate.