]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
re PR preprocessor/12847 (xxx.c:1:20: xxxx.h: No such file or directory)
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db 1/* CPP Library - lexical analysis.
78b8811a 2 Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
45b966db
ZW
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 2, or (at your option) any
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; if not, write to the Free Software
20Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
21
22#include "config.h"
23#include "system.h"
45b966db
ZW
24#include "cpplib.h"
25#include "cpphash.h"
26
93c80368 27enum spell_type
f9a0e96c 28{
93c80368 29 SPELL_OPERATOR = 0,
93c80368 30 SPELL_IDENT,
6338b358 31 SPELL_LITERAL,
93c80368 32 SPELL_NONE
f9a0e96c
ZW
33};
34
93c80368 35struct token_spelling
f9a0e96c 36{
93c80368
NB
37 enum spell_type category;
38 const unsigned char *name;
f9a0e96c
ZW
39};
40
8206c799
ZW
41static const unsigned char *const digraph_spellings[] =
42{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
93c80368
NB
43
44#define OP(e, s) { SPELL_OPERATOR, U s },
9a238586 45#define TK(e, s) { s, U #e },
8206c799 46static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
93c80368
NB
47#undef OP
48#undef TK
49
50#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
f2d5f0cc 52
6cf87ca4
ZW
53static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54static int skip_line_comment (cpp_reader *);
55static void skip_whitespace (cpp_reader *, cppchar_t);
56static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
57static void lex_number (cpp_reader *, cpp_string *);
58static bool forms_identifier_p (cpp_reader *, int);
59static void lex_string (cpp_reader *, cpp_token *, const uchar *);
60static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
61static void create_literal (cpp_reader *, cpp_token *, const uchar *,
62 unsigned int, enum cpp_ttype);
63static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
64static int name_p (cpp_reader *, const cpp_string *);
6cf87ca4
ZW
65static tokenrun *next_tokenrun (tokenrun *);
66
6cf87ca4 67static _cpp_buff *new_buff (size_t);
15dad1d9 68
9d10c9a9 69
041c3194 70/* Utility routine:
9e62c811 71
bfb9dc7f
ZW
72 Compares, the token TOKEN to the NUL-terminated string STRING.
73 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
041c3194 74int
6cf87ca4 75cpp_ideq (const cpp_token *token, const char *string)
041c3194 76{
bfb9dc7f 77 if (token->type != CPP_NAME)
041c3194 78 return 0;
bfb9dc7f 79
562a5c27 80 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
15dad1d9 81}
1368ee70 82
26aea073
NB
83/* Record a note TYPE at byte POS into the current cleaned logical
84 line. */
87062813 85static void
6cf87ca4 86add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
0d9f234d 87{
26aea073
NB
88 if (buffer->notes_used == buffer->notes_cap)
89 {
90 buffer->notes_cap = buffer->notes_cap * 2 + 200;
703ad42b
KG
91 buffer->notes = xrealloc (buffer->notes,
92 buffer->notes_cap * sizeof (_cpp_line_note));
26aea073 93 }
0d9f234d 94
26aea073
NB
95 buffer->notes[buffer->notes_used].pos = pos;
96 buffer->notes[buffer->notes_used].type = type;
97 buffer->notes_used++;
0d9f234d
NB
98}
99
26aea073
NB
100/* Returns with a logical line that contains no escaped newlines or
101 trigraphs. This is a time-critical inner loop. */
102void
6cf87ca4 103_cpp_clean_line (cpp_reader *pfile)
45b966db 104{
26aea073
NB
105 cpp_buffer *buffer;
106 const uchar *s;
107 uchar c, *d, *p;
87062813 108
26aea073
NB
109 buffer = pfile->buffer;
110 buffer->cur_note = buffer->notes_used = 0;
111 buffer->cur = buffer->line_base = buffer->next_line;
112 buffer->need_line = false;
113 s = buffer->next_line - 1;
87062813 114
26aea073 115 if (!buffer->from_stage3)
45b966db 116 {
d08dcf87
ZW
117 /* Short circuit for the common case of an un-escaped line with
118 no trigraphs. The primary win here is by not writing any
119 data back to memory until we have to. */
120 for (;;)
121 {
122 c = *++s;
123 if (c == '\n' || c == '\r')
124 {
125 d = (uchar *) s;
126
127 if (s == buffer->rlimit)
128 goto done;
129
130 /* DOS line ending? */
131 if (c == '\r' && s[1] == '\n')
132 s++;
133
134 if (s == buffer->rlimit)
135 goto done;
136
137 /* check for escaped newline */
138 p = d;
139 while (p != buffer->next_line && is_nvspace (p[-1]))
140 p--;
141 if (p == buffer->next_line || p[-1] != '\\')
142 goto done;
143
144 /* Have an escaped newline; process it and proceed to
145 the slow path. */
146 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
147 d = p - 2;
148 buffer->next_line = p - 1;
149 break;
150 }
151 if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
152 {
153 /* Have a trigraph. We may or may not have to convert
154 it. Add a line note regardless, for -Wtrigraphs. */
155 add_line_note (buffer, s, s[2]);
156 if (CPP_OPTION (pfile, trigraphs))
157 {
158 /* We do, and that means we have to switch to the
159 slow path. */
160 d = (uchar *) s;
161 *d = _cpp_trigraph_map[s[2]];
162 s += 2;
163 break;
164 }
165 }
166 }
167
26aea073
NB
168
169 for (;;)
4a5b68a2 170 {
26aea073
NB
171 c = *++s;
172 *++d = c;
173
174 if (c == '\n' || c == '\r')
175 {
176 /* Handle DOS line endings. */
177 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
178 s++;
179 if (s == buffer->rlimit)
180 break;
181
182 /* Escaped? */
183 p = d;
184 while (p != buffer->next_line && is_nvspace (p[-1]))
185 p--;
186 if (p == buffer->next_line || p[-1] != '\\')
187 break;
188
41c32c98 189 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
26aea073
NB
190 d = p - 2;
191 buffer->next_line = p - 1;
192 }
193 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
194 {
195 /* Add a note regardless, for the benefit of -Wtrigraphs. */
41c32c98 196 add_line_note (buffer, d, s[2]);
26aea073
NB
197 if (CPP_OPTION (pfile, trigraphs))
198 {
199 *d = _cpp_trigraph_map[s[2]];
200 s += 2;
201 }
202 }
4a5b68a2 203 }
45b966db 204 }
26aea073
NB
205 else
206 {
207 do
208 s++;
209 while (*s != '\n' && *s != '\r');
210 d = (uchar *) s;
211
212 /* Handle DOS line endings. */
213 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
214 s++;
215 }
0d9f234d 216
d08dcf87 217 done:
26aea073 218 *d = '\n';
41c32c98
NB
219 /* A sentinel note that should never be processed. */
220 add_line_note (buffer, d + 1, '\n');
26aea073 221 buffer->next_line = s + 1;
45b966db
ZW
222}
223
a8eb6044
NB
224/* Return true if the trigraph indicated by NOTE should be warned
225 about in a comment. */
226static bool
6cf87ca4 227warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
a8eb6044
NB
228{
229 const uchar *p;
230
231 /* Within comments we don't warn about trigraphs, unless the
232 trigraph forms an escaped newline, as that may change
6356f892 233 behavior. */
a8eb6044
NB
234 if (note->type != '/')
235 return false;
236
237 /* If -trigraphs, then this was an escaped newline iff the next note
238 is coincident. */
239 if (CPP_OPTION (pfile, trigraphs))
240 return note[1].pos == note->pos;
241
242 /* Otherwise, see if this forms an escaped newline. */
243 p = note->pos + 3;
244 while (is_nvspace (*p))
245 p++;
246
247 /* There might have been escaped newlines between the trigraph and the
248 newline we found. Hence the position test. */
249 return (*p == '\n' && p < note[1].pos);
250}
251
26aea073
NB
252/* Process the notes created by add_line_note as far as the current
253 location. */
254void
6cf87ca4 255_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
45b966db 256{
29401c30
NB
257 cpp_buffer *buffer = pfile->buffer;
258
26aea073 259 for (;;)
041c3194 260 {
26aea073
NB
261 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
262 unsigned int col;
a5c3cccd 263
26aea073
NB
264 if (note->pos > buffer->cur)
265 break;
a5c3cccd 266
26aea073
NB
267 buffer->cur_note++;
268 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
4d6baafa 269
41c32c98 270 if (note->type == '\\' || note->type == ' ')
26aea073 271 {
41c32c98 272 if (note->type == ' ' && !in_comment)
0527bc4e 273 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line, col,
26aea073 274 "backslash and newline separated by space");
41c32c98 275
26aea073 276 if (buffer->next_line > buffer->rlimit)
87062813 277 {
0527bc4e 278 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line, col,
26aea073
NB
279 "backslash-newline at end of file");
280 /* Prevent "no newline at end of file" warning. */
281 buffer->next_line = buffer->rlimit;
87062813 282 }
26aea073
NB
283
284 buffer->line_base = note->pos;
285 pfile->line++;
0d9f234d 286 }
41c32c98
NB
287 else if (_cpp_trigraph_map[note->type])
288 {
a8eb6044
NB
289 if (CPP_OPTION (pfile, warn_trigraphs)
290 && (!in_comment || warn_in_comment (pfile, note)))
41c32c98
NB
291 {
292 if (CPP_OPTION (pfile, trigraphs))
0527bc4e 293 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line, col,
41c32c98
NB
294 "trigraph ??%c converted to %c",
295 note->type,
296 (int) _cpp_trigraph_map[note->type]);
297 else
905bd7b5
GK
298 {
299 cpp_error_with_line
0527bc4e 300 (pfile, CPP_DL_WARNING, pfile->line, col,
905bd7b5
GK
301 "trigraph ??%c ignored, use -trigraphs to enable",
302 note->type);
303 }
41c32c98
NB
304 }
305 }
306 else
307 abort ();
041c3194 308 }
45b966db
ZW
309}
310
0d9f234d
NB
311/* Skip a C-style block comment. We find the end of the comment by
312 seeing if an asterisk is before every '/' we encounter. Returns
6f572ac2
NB
313 nonzero if comment terminated by EOF, zero otherwise.
314
315 Buffer->cur points to the initial asterisk of the comment. */
26aea073 316bool
6cf87ca4 317_cpp_skip_block_comment (cpp_reader *pfile)
45b966db 318{
041c3194 319 cpp_buffer *buffer = pfile->buffer;
d08dcf87
ZW
320 const uchar *cur = buffer->cur;
321 uchar c;
0d9f234d 322
d08dcf87
ZW
323 cur++;
324 if (*cur == '/')
325 cur++;
0d9f234d 326
26aea073
NB
327 for (;;)
328 {
0d9f234d
NB
329 /* People like decorating comments with '*', so check for '/'
330 instead for efficiency. */
d08dcf87
ZW
331 c = *cur++;
332
041c3194 333 if (c == '/')
45b966db 334 {
d08dcf87 335 if (cur[-2] == '*')
0d9f234d 336 break;
041c3194 337
0d9f234d 338 /* Warn about potential nested comments, but not if the '/'
a1f300c0 339 comes immediately before the true comment delimiter.
041c3194 340 Don't bother to get it right across escaped newlines. */
0d9f234d 341 if (CPP_OPTION (pfile, warn_comments)
d08dcf87
ZW
342 && cur[0] == '*' && cur[1] != '/')
343 {
344 buffer->cur = cur;
0527bc4e 345 cpp_error_with_line (pfile, CPP_DL_WARNING,
d08dcf87
ZW
346 pfile->line, CPP_BUF_COL (buffer),
347 "\"/*\" within comment");
348 }
45b966db 349 }
26aea073
NB
350 else if (c == '\n')
351 {
d08dcf87 352 buffer->cur = cur - 1;
26aea073
NB
353 _cpp_process_line_notes (pfile, true);
354 if (buffer->next_line >= buffer->rlimit)
355 return true;
356 _cpp_clean_line (pfile);
357 pfile->line++;
d08dcf87 358 cur = buffer->cur;
26aea073 359 }
45b966db 360 }
041c3194 361
d08dcf87 362 buffer->cur = cur;
a8eb6044 363 _cpp_process_line_notes (pfile, true);
26aea073 364 return false;
45b966db
ZW
365}
366
480709cc 367/* Skip a C++ line comment, leaving buffer->cur pointing to the
da7d8304 368 terminating newline. Handles escaped newlines. Returns nonzero
480709cc 369 if a multiline comment. */
041c3194 370static int
6cf87ca4 371skip_line_comment (cpp_reader *pfile)
45b966db 372{
cbcff6df 373 cpp_buffer *buffer = pfile->buffer;
67821e3a 374 unsigned int orig_line = pfile->line;
041c3194 375
26aea073
NB
376 while (*buffer->cur != '\n')
377 buffer->cur++;
480709cc 378
26aea073 379 _cpp_process_line_notes (pfile, true);
67821e3a 380 return orig_line != pfile->line;
041c3194 381}
45b966db 382
26aea073 383/* Skips whitespace, saving the next non-whitespace character. */
52fadca8 384static void
6cf87ca4 385skip_whitespace (cpp_reader *pfile, cppchar_t c)
041c3194
ZW
386{
387 cpp_buffer *buffer = pfile->buffer;
f7d151fb 388 bool saw_NUL = false;
45b966db 389
0d9f234d 390 do
041c3194 391 {
91fcd158 392 /* Horizontal space always OK. */
26aea073 393 if (c == ' ' || c == '\t')
0d9f234d 394 ;
0d9f234d 395 /* Just \f \v or \0 left. */
91fcd158 396 else if (c == '\0')
f7d151fb 397 saw_NUL = true;
93c80368 398 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
0527bc4e 399 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line,
ebef4e8c
NB
400 CPP_BUF_COL (buffer),
401 "%s in preprocessing directive",
402 c == '\f' ? "form feed" : "vertical tab");
0d9f234d 403
0d9f234d 404 c = *buffer->cur++;
45b966db 405 }
ec5c56db 406 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
0d9f234d
NB
407 while (is_nvspace (c));
408
f7d151fb 409 if (saw_NUL)
0527bc4e 410 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
f7d151fb 411
480709cc 412 buffer->cur--;
041c3194 413}
45b966db 414
93c80368
NB
415/* See if the characters of a number token are valid in a name (no
416 '.', '+' or '-'). */
417static int
6cf87ca4 418name_p (cpp_reader *pfile, const cpp_string *string)
93c80368
NB
419{
420 unsigned int i;
421
422 for (i = 0; i < string->len; i++)
423 if (!is_idchar (string->text[i]))
424 return 0;
425
df383483 426 return 1;
93c80368
NB
427}
428
bced6edf 429/* Returns TRUE if the sequence starting at buffer->cur is invalid in
1613e52b 430 an identifier. FIRST is TRUE if this starts an identifier. */
bced6edf 431static bool
6cf87ca4 432forms_identifier_p (cpp_reader *pfile, int first)
bced6edf 433{
1613e52b
NB
434 cpp_buffer *buffer = pfile->buffer;
435
436 if (*buffer->cur == '$')
437 {
438 if (!CPP_OPTION (pfile, dollars_in_ident))
439 return false;
440
441 buffer->cur++;
78b8811a 442 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1613e52b 443 {
78b8811a 444 CPP_OPTION (pfile, warn_dollars) = 0;
0527bc4e 445 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1613e52b
NB
446 }
447
448 return true;
449 }
bced6edf 450
1613e52b
NB
451 /* Is this a syntactically valid UCN? */
452 if (0 && *buffer->cur == '\\'
453 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
bced6edf 454 {
1613e52b 455 buffer->cur += 2;
e6cc3a24 456 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
1613e52b
NB
457 return true;
458 buffer->cur -= 2;
bced6edf 459 }
bced6edf 460
1613e52b 461 return false;
bced6edf
NB
462}
463
464/* Lex an identifier starting at BUFFER->CUR - 1. */
0d9f234d 465static cpp_hashnode *
6cf87ca4 466lex_identifier (cpp_reader *pfile, const uchar *base)
45b966db 467{
93c80368 468 cpp_hashnode *result;
1613e52b 469 const uchar *cur;
2c3fcba6 470
bced6edf 471 do
10cf9bde 472 {
bced6edf
NB
473 cur = pfile->buffer->cur;
474
475 /* N.B. ISIDNUM does not include $. */
476 while (ISIDNUM (*cur))
477 cur++;
10cf9bde 478
10cf9bde 479 pfile->buffer->cur = cur;
2c3fcba6 480 }
1613e52b 481 while (forms_identifier_p (pfile, false));
bced6edf
NB
482
483 result = (cpp_hashnode *)
484 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
2c3fcba6 485
bced6edf 486 /* Rarely, identifiers require diagnostics when lexed. */
2c3fcba6
ZW
487 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
488 && !pfile->state.skipping, 0))
489 {
490 /* It is allowed to poison the same identifier twice. */
491 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
0527bc4e 492 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
2c3fcba6
ZW
493 NODE_NAME (result));
494
495 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
496 replacement list of a variadic macro. */
497 if (result == pfile->spec_nodes.n__VA_ARGS__
498 && !pfile->state.va_args_ok)
0527bc4e 499 cpp_error (pfile, CPP_DL_PEDWARN,
6cf87ca4
ZW
500 "__VA_ARGS__ can only appear in the expansion"
501 " of a C99 variadic macro");
2c3fcba6
ZW
502 }
503
504 return result;
505}
506
bced6edf 507/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
45b966db 508static void
6cf87ca4 509lex_number (cpp_reader *pfile, cpp_string *number)
45b966db 510{
562a5c27 511 const uchar *cur;
bced6edf
NB
512 const uchar *base;
513 uchar *dest;
45b966db 514
bced6edf
NB
515 base = pfile->buffer->cur - 1;
516 do
041c3194 517 {
bced6edf 518 cur = pfile->buffer->cur;
0d9f234d 519
bced6edf
NB
520 /* N.B. ISIDNUM does not include $. */
521 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
522 cur++;
45b966db 523
10cf9bde 524 pfile->buffer->cur = cur;
45b966db 525 }
1613e52b 526 while (forms_identifier_p (pfile, false));
93c80368 527
bced6edf
NB
528 number->len = cur - base;
529 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
530 memcpy (dest, base, number->len);
531 dest[number->len] = '\0';
532 number->text = dest;
93c80368
NB
533}
534
6338b358
NB
535/* Create a token of type TYPE with a literal spelling. */
536static void
6cf87ca4
ZW
537create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
538 unsigned int len, enum cpp_ttype type)
6338b358
NB
539{
540 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
541
542 memcpy (dest, base, len);
543 dest[len] = '\0';
544 token->type = type;
545 token->val.str.len = len;
546 token->val.str.text = dest;
547}
548
bced6edf 549/* Lexes a string, character constant, or angle-bracketed header file
6338b358
NB
550 name. The stored string contains the spelling, including opening
551 quote and leading any leading 'L'. It returns the type of the
552 literal, or CPP_OTHER if it was not properly terminated.
553
554 The spelling is NUL-terminated, but it is not guaranteed that this
555 is the first NUL since embedded NULs are preserved. */
041c3194 556static void
6cf87ca4 557lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
45b966db 558{
6338b358
NB
559 bool saw_NUL = false;
560 const uchar *cur;
bced6edf 561 cppchar_t terminator;
6338b358
NB
562 enum cpp_ttype type;
563
564 cur = base;
565 terminator = *cur++;
566 if (terminator == 'L')
567 terminator = *cur++;
568 if (terminator == '\"')
569 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
570 else if (terminator == '\'')
571 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
572 else
573 terminator = '>', type = CPP_HEADER_NAME;
93c80368 574
0d9f234d 575 for (;;)
45b966db 576 {
6338b358 577 cppchar_t c = *cur++;
7868b4a2 578
6f572ac2 579 /* In #include-style directives, terminators are not escapable. */
6338b358
NB
580 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
581 cur++;
582 else if (c == terminator)
bced6edf 583 break;
6338b358 584 else if (c == '\n')
0d9f234d 585 {
6338b358
NB
586 cur--;
587 type = CPP_OTHER;
588 break;
45b966db 589 }
6338b358
NB
590 else if (c == '\0')
591 saw_NUL = true;
45b966db
ZW
592 }
593
6338b358 594 if (saw_NUL && !pfile->state.skipping)
0527bc4e
JDA
595 cpp_error (pfile, CPP_DL_WARNING,
596 "null character(s) preserved in literal");
45b966db 597
6338b358
NB
598 pfile->buffer->cur = cur;
599 create_literal (pfile, token, base, cur - base, type);
0d9f234d 600}
041c3194 601
93c80368 602/* The stored comment includes the comment start and any terminator. */
9e62c811 603static void
6cf87ca4
ZW
604save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
605 cppchar_t type)
9e62c811 606{
041c3194 607 unsigned char *buffer;
477cdac7 608 unsigned int len, clen;
df383483 609
1c6d33ef 610 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
480709cc 611
3542203b
NB
612 /* C++ comments probably (not definitely) have moved past a new
613 line, which we don't want to save in the comment. */
480709cc 614 if (is_vspace (pfile->buffer->cur[-1]))
3542203b 615 len--;
477cdac7
JT
616
617 /* If we are currently in a directive, then we need to store all
618 C++ comments as C comments internally, and so we need to
619 allocate a little extra space in that case.
620
621 Note that the only time we encounter a directive here is
622 when we are saving comments in a "#define". */
623 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
624
625 buffer = _cpp_unaligned_alloc (pfile, clen);
df383483 626
041c3194 627 token->type = CPP_COMMENT;
477cdac7 628 token->val.str.len = clen;
0d9f234d 629 token->val.str.text = buffer;
45b966db 630
1c6d33ef
NB
631 buffer[0] = '/';
632 memcpy (buffer + 1, from, len - 1);
477cdac7 633
1eeeb6a4 634 /* Finish conversion to a C comment, if necessary. */
477cdac7
JT
635 if (pfile->state.in_directive && type == '/')
636 {
637 buffer[1] = '*';
638 buffer[clen - 2] = '*';
639 buffer[clen - 1] = '/';
640 }
0d9f234d 641}
45b966db 642
5fddcffc
NB
643/* Allocate COUNT tokens for RUN. */
644void
6cf87ca4 645_cpp_init_tokenrun (tokenrun *run, unsigned int count)
5fddcffc
NB
646{
647 run->base = xnewvec (cpp_token, count);
648 run->limit = run->base + count;
649 run->next = NULL;
650}
651
652/* Returns the next tokenrun, or creates one if there is none. */
653static tokenrun *
6cf87ca4 654next_tokenrun (tokenrun *run)
5fddcffc
NB
655{
656 if (run->next == NULL)
657 {
658 run->next = xnew (tokenrun);
bdcbe496 659 run->next->prev = run;
5fddcffc
NB
660 _cpp_init_tokenrun (run->next, 250);
661 }
662
663 return run->next;
664}
665
4ed5bcfb
NB
666/* Allocate a single token that is invalidated at the same time as the
667 rest of the tokens on the line. Has its line and col set to the
668 same as the last lexed token, so that diagnostics appear in the
669 right place. */
670cpp_token *
6cf87ca4 671_cpp_temp_token (cpp_reader *pfile)
4ed5bcfb
NB
672{
673 cpp_token *old, *result;
674
675 old = pfile->cur_token - 1;
676 if (pfile->cur_token == pfile->cur_run->limit)
677 {
678 pfile->cur_run = next_tokenrun (pfile->cur_run);
679 pfile->cur_token = pfile->cur_run->base;
680 }
681
682 result = pfile->cur_token++;
683 result->line = old->line;
684 result->col = old->col;
685 return result;
686}
687
14baae01
NB
688/* Lex a token into RESULT (external interface). Takes care of issues
689 like directive handling, token lookahead, multiple include
a1f300c0 690 optimization and skipping. */
345894b4 691const cpp_token *
6cf87ca4 692_cpp_lex_token (cpp_reader *pfile)
5fddcffc 693{
bdcbe496 694 cpp_token *result;
5fddcffc 695
bdcbe496 696 for (;;)
5fddcffc 697 {
bdcbe496 698 if (pfile->cur_token == pfile->cur_run->limit)
5fddcffc 699 {
bdcbe496
NB
700 pfile->cur_run = next_tokenrun (pfile->cur_run);
701 pfile->cur_token = pfile->cur_run->base;
5fddcffc
NB
702 }
703
bdcbe496 704 if (pfile->lookaheads)
14baae01
NB
705 {
706 pfile->lookaheads--;
707 result = pfile->cur_token++;
708 }
bdcbe496 709 else
14baae01 710 result = _cpp_lex_direct (pfile);
bdcbe496
NB
711
712 if (result->flags & BOL)
5fddcffc 713 {
bdcbe496
NB
714 /* Is this a directive. If _cpp_handle_directive returns
715 false, it is an assembler #. */
716 if (result->type == CPP_HASH
e808ec9c
NB
717 /* 6.10.3 p 11: Directives in a list of macro arguments
718 gives undefined behavior. This implementation
719 handles the directive as normal. */
720 && pfile->state.parsing_args != 1
bdcbe496
NB
721 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
722 continue;
97293897 723 if (pfile->cb.line_change && !pfile->state.skipping)
6cf87ca4 724 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
5fddcffc 725 }
5fddcffc 726
bdcbe496
NB
727 /* We don't skip tokens in directives. */
728 if (pfile->state.in_directive)
729 break;
5fddcffc 730
bdcbe496 731 /* Outside a directive, invalidate controlling macros. At file
14baae01 732 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
6356f892 733 get here and MI optimization works. */
5fddcffc 734 pfile->mi_valid = false;
bdcbe496
NB
735
736 if (!pfile->state.skipping || result->type == CPP_EOF)
737 break;
5fddcffc
NB
738 }
739
345894b4 740 return result;
5fddcffc
NB
741}
742
26aea073
NB
743/* Returns true if a fresh line has been loaded. */
744bool
6cf87ca4 745_cpp_get_fresh_line (cpp_reader *pfile)
004cb263 746{
26aea073
NB
747 /* We can't get a new line until we leave the current directive. */
748 if (pfile->state.in_directive)
749 return false;
df383483 750
26aea073 751 for (;;)
1a76916c 752 {
26aea073 753 cpp_buffer *buffer = pfile->buffer;
1a76916c 754
26aea073
NB
755 if (!buffer->need_line)
756 return true;
757
758 if (buffer->next_line < buffer->rlimit)
004cb263 759 {
26aea073
NB
760 _cpp_clean_line (pfile);
761 return true;
762 }
004cb263 763
26aea073
NB
764 /* First, get out of parsing arguments state. */
765 if (pfile->state.parsing_args)
766 return false;
767
768 /* End of buffer. Non-empty files should end in a newline. */
769 if (buffer->buf != buffer->rlimit
770 && buffer->next_line > buffer->rlimit
771 && !buffer->from_stage3)
772 {
773 /* Only warn once. */
774 buffer->next_line = buffer->rlimit;
0527bc4e 775 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line - 1,
26aea073
NB
776 CPP_BUF_COLUMN (buffer, buffer->cur),
777 "no newline at end of file");
778 }
779
26aea073 780 _cpp_pop_buffer (pfile);
a506c55c
PB
781 if (pfile->buffer == NULL)
782 return false;
26aea073 783 }
004cb263
NB
784}
785
6f572ac2
NB
786#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
787 do \
788 { \
789 result->type = ELSE_TYPE; \
790 if (*buffer->cur == CHAR) \
791 buffer->cur++, result->type = THEN_TYPE; \
792 } \
793 while (0)
480709cc 794
14baae01
NB
795/* Lex a token into pfile->cur_token, which is also incremented, to
796 get diagnostics pointing to the correct location.
797
798 Does not handle issues such as token lookahead, multiple-include
f1ba665b 799 optimization, directives, skipping etc. This function is only
14baae01
NB
800 suitable for use by _cpp_lex_token, and in special cases like
801 lex_expansion_token which doesn't care for any of these issues.
802
803 When meeting a newline, returns CPP_EOF if parsing a directive,
804 otherwise returns to the start of the token buffer if permissible.
805 Returns the location of the lexed token. */
806cpp_token *
6cf87ca4 807_cpp_lex_direct (cpp_reader *pfile)
45b966db 808{
0d9f234d 809 cppchar_t c;
adb84b42 810 cpp_buffer *buffer;
0d9f234d 811 const unsigned char *comment_start;
14baae01 812 cpp_token *result = pfile->cur_token++;
9ec7291f 813
5fddcffc 814 fresh_line:
26aea073 815 result->flags = 0;
2be570f9 816 buffer = pfile->buffer;
a506c55c 817 if (buffer->need_line)
26aea073
NB
818 {
819 if (!_cpp_get_fresh_line (pfile))
820 {
821 result->type = CPP_EOF;
9ff7868d
NB
822 if (!pfile->state.in_directive)
823 {
824 /* Tell the compiler the line number of the EOF token. */
825 result->line = pfile->line;
826 result->flags = BOL;
827 }
26aea073
NB
828 return result;
829 }
830 if (!pfile->keep_tokens)
831 {
832 pfile->cur_run = &pfile->base_run;
833 result = pfile->base_run.base;
834 pfile->cur_token = result + 1;
835 }
836 result->flags = BOL;
837 if (pfile->state.parsing_args == 2)
838 result->flags |= PREV_WHITE;
839 }
a506c55c 840 buffer = pfile->buffer;
5fddcffc 841 update_tokens_line:
1444f2ed 842 result->line = pfile->line;
041c3194 843
5fddcffc 844 skipped_white:
26aea073
NB
845 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
846 && !pfile->overlaid_buffer)
847 {
848 _cpp_process_line_notes (pfile, false);
849 result->line = pfile->line;
850 }
480709cc 851 c = *buffer->cur++;
5fddcffc 852 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
5fddcffc 853
0d9f234d 854 switch (c)
45b966db 855 {
4d6baafa
NB
856 case ' ': case '\t': case '\f': case '\v': case '\0':
857 result->flags |= PREV_WHITE;
26aea073
NB
858 skip_whitespace (pfile, c);
859 goto skipped_white;
0d9f234d 860
26aea073
NB
861 case '\n':
862 pfile->line++;
863 buffer->need_line = true;
864 goto fresh_line;
46d07497 865
0d9f234d
NB
866 case '0': case '1': case '2': case '3': case '4':
867 case '5': case '6': case '7': case '8': case '9':
868 result->type = CPP_NUMBER;
bced6edf 869 lex_number (pfile, &result->val.str);
0d9f234d 870 break;
46d07497 871
0abc6a6a
NB
872 case 'L':
873 /* 'L' may introduce wide characters or strings. */
bced6edf
NB
874 if (*buffer->cur == '\'' || *buffer->cur == '"')
875 {
6338b358 876 lex_string (pfile, result, buffer->cur - 1);
bced6edf
NB
877 break;
878 }
df383483 879 /* Fall through. */
0abc6a6a 880
0d9f234d
NB
881 case '_':
882 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
883 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
884 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
885 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
886 case 'y': case 'z':
887 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
0abc6a6a 888 case 'G': case 'H': case 'I': case 'J': case 'K':
0d9f234d
NB
889 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
890 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
891 case 'Y': case 'Z':
892 result->type = CPP_NAME;
1613e52b 893 result->val.node = lex_identifier (pfile, buffer->cur - 1);
0d9f234d 894
0d9f234d 895 /* Convert named operators to their proper types. */
0abc6a6a 896 if (result->val.node->flags & NODE_OPERATOR)
0d9f234d
NB
897 {
898 result->flags |= NAMED_OP;
4977bab6 899 result->type = result->val.node->directive_index;
0d9f234d
NB
900 }
901 break;
902
903 case '\'':
904 case '"':
6338b358 905 lex_string (pfile, result, buffer->cur - 1);
0d9f234d 906 break;
041c3194 907
0d9f234d 908 case '/':
1c6d33ef
NB
909 /* A potential block or line comment. */
910 comment_start = buffer->cur;
6f572ac2
NB
911 c = *buffer->cur;
912
1c6d33ef
NB
913 if (c == '*')
914 {
26aea073 915 if (_cpp_skip_block_comment (pfile))
0527bc4e 916 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
0d9f234d 917 }
480709cc
NB
918 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
919 || CPP_IN_SYSTEM_HEADER (pfile)))
0d9f234d 920 {
bdb05a7b
NB
921 /* Warn about comments only if pedantically GNUC89, and not
922 in system headers. */
923 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 924 && ! buffer->warned_cplusplus_comments)
041c3194 925 {
0527bc4e 926 cpp_error (pfile, CPP_DL_PEDWARN,
56508306 927 "C++ style comments are not allowed in ISO C90");
0527bc4e 928 cpp_error (pfile, CPP_DL_PEDWARN,
ebef4e8c 929 "(this will be reported only once per input file)");
1c6d33ef
NB
930 buffer->warned_cplusplus_comments = 1;
931 }
0d9f234d 932
01ef6563 933 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
0527bc4e 934 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1c6d33ef 935 }
480709cc
NB
936 else if (c == '=')
937 {
6f572ac2 938 buffer->cur++;
480709cc
NB
939 result->type = CPP_DIV_EQ;
940 break;
941 }
942 else
943 {
480709cc
NB
944 result->type = CPP_DIV;
945 break;
946 }
0d9f234d 947
1c6d33ef
NB
948 if (!pfile->state.save_comments)
949 {
950 result->flags |= PREV_WHITE;
5fddcffc 951 goto update_tokens_line;
0d9f234d 952 }
1c6d33ef
NB
953
954 /* Save the comment as a token in its own right. */
477cdac7 955 save_comment (pfile, result, comment_start, c);
bdcbe496 956 break;
0d9f234d
NB
957
958 case '<':
959 if (pfile->state.angled_headers)
960 {
6338b358 961 lex_string (pfile, result, buffer->cur - 1);
480709cc 962 break;
0d9f234d 963 }
45b966db 964
6f572ac2
NB
965 result->type = CPP_LESS;
966 if (*buffer->cur == '=')
967 buffer->cur++, result->type = CPP_LESS_EQ;
968 else if (*buffer->cur == '<')
0d9f234d 969 {
6f572ac2
NB
970 buffer->cur++;
971 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
0d9f234d 972 }
6f572ac2 973 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
0d9f234d 974 {
6f572ac2
NB
975 buffer->cur++;
976 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
0d9f234d 977 }
6f572ac2 978 else if (CPP_OPTION (pfile, digraphs))
480709cc 979 {
6f572ac2
NB
980 if (*buffer->cur == ':')
981 {
982 buffer->cur++;
983 result->flags |= DIGRAPH;
984 result->type = CPP_OPEN_SQUARE;
985 }
986 else if (*buffer->cur == '%')
987 {
988 buffer->cur++;
989 result->flags |= DIGRAPH;
990 result->type = CPP_OPEN_BRACE;
991 }
480709cc 992 }
0d9f234d
NB
993 break;
994
995 case '>':
6f572ac2
NB
996 result->type = CPP_GREATER;
997 if (*buffer->cur == '=')
998 buffer->cur++, result->type = CPP_GREATER_EQ;
999 else if (*buffer->cur == '>')
0d9f234d 1000 {
6f572ac2
NB
1001 buffer->cur++;
1002 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1003 }
1004 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1005 {
1006 buffer->cur++;
1007 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
0d9f234d
NB
1008 }
1009 break;
1010
cbcff6df 1011 case '%':
6f572ac2
NB
1012 result->type = CPP_MOD;
1013 if (*buffer->cur == '=')
1014 buffer->cur++, result->type = CPP_MOD_EQ;
1015 else if (CPP_OPTION (pfile, digraphs))
480709cc 1016 {
6f572ac2 1017 if (*buffer->cur == ':')
480709cc 1018 {
6f572ac2
NB
1019 buffer->cur++;
1020 result->flags |= DIGRAPH;
1021 result->type = CPP_HASH;
1022 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1023 buffer->cur += 2, result->type = CPP_PASTE;
1024 }
1025 else if (*buffer->cur == '>')
1026 {
1027 buffer->cur++;
1028 result->flags |= DIGRAPH;
1029 result->type = CPP_CLOSE_BRACE;
480709cc 1030 }
480709cc 1031 }
0d9f234d
NB
1032 break;
1033
cbcff6df 1034 case '.':
480709cc 1035 result->type = CPP_DOT;
6f572ac2 1036 if (ISDIGIT (*buffer->cur))
480709cc
NB
1037 {
1038 result->type = CPP_NUMBER;
bced6edf 1039 lex_number (pfile, &result->val.str);
480709cc 1040 }
6f572ac2
NB
1041 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1042 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1043 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1044 buffer->cur++, result->type = CPP_DOT_STAR;
0d9f234d 1045 break;
45b966db 1046
0d9f234d 1047 case '+':
6f572ac2
NB
1048 result->type = CPP_PLUS;
1049 if (*buffer->cur == '+')
1050 buffer->cur++, result->type = CPP_PLUS_PLUS;
1051 else if (*buffer->cur == '=')
1052 buffer->cur++, result->type = CPP_PLUS_EQ;
0d9f234d 1053 break;
04e3ec78 1054
0d9f234d 1055 case '-':
6f572ac2
NB
1056 result->type = CPP_MINUS;
1057 if (*buffer->cur == '>')
0d9f234d 1058 {
6f572ac2 1059 buffer->cur++;
480709cc 1060 result->type = CPP_DEREF;
6f572ac2
NB
1061 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1062 buffer->cur++, result->type = CPP_DEREF_STAR;
480709cc 1063 }
6f572ac2
NB
1064 else if (*buffer->cur == '-')
1065 buffer->cur++, result->type = CPP_MINUS_MINUS;
1066 else if (*buffer->cur == '=')
1067 buffer->cur++, result->type = CPP_MINUS_EQ;
0d9f234d 1068 break;
45b966db 1069
0d9f234d 1070 case '&':
6f572ac2
NB
1071 result->type = CPP_AND;
1072 if (*buffer->cur == '&')
1073 buffer->cur++, result->type = CPP_AND_AND;
1074 else if (*buffer->cur == '=')
1075 buffer->cur++, result->type = CPP_AND_EQ;
0d9f234d 1076 break;
df383483 1077
0d9f234d 1078 case '|':
6f572ac2
NB
1079 result->type = CPP_OR;
1080 if (*buffer->cur == '|')
1081 buffer->cur++, result->type = CPP_OR_OR;
1082 else if (*buffer->cur == '=')
1083 buffer->cur++, result->type = CPP_OR_EQ;
0d9f234d 1084 break;
45b966db 1085
0d9f234d 1086 case ':':
6f572ac2
NB
1087 result->type = CPP_COLON;
1088 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1089 buffer->cur++, result->type = CPP_SCOPE;
1090 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
0d9f234d 1091 {
6f572ac2 1092 buffer->cur++;
0d9f234d 1093 result->flags |= DIGRAPH;
480709cc
NB
1094 result->type = CPP_CLOSE_SQUARE;
1095 }
0d9f234d 1096 break;
45b966db 1097
480709cc
NB
1098 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1099 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1100 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1101 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1102 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1103
26aea073 1104 case '?': result->type = CPP_QUERY; break;
0d9f234d
NB
1105 case '~': result->type = CPP_COMPL; break;
1106 case ',': result->type = CPP_COMMA; break;
1107 case '(': result->type = CPP_OPEN_PAREN; break;
1108 case ')': result->type = CPP_CLOSE_PAREN; break;
1109 case '[': result->type = CPP_OPEN_SQUARE; break;
1110 case ']': result->type = CPP_CLOSE_SQUARE; break;
1111 case '{': result->type = CPP_OPEN_BRACE; break;
1112 case '}': result->type = CPP_CLOSE_BRACE; break;
1113 case ';': result->type = CPP_SEMICOLON; break;
1114
40f03658 1115 /* @ is a punctuator in Objective-C. */
cc937581 1116 case '@': result->type = CPP_ATSIGN; break;
0d9f234d 1117
0abc6a6a 1118 case '$':
1613e52b
NB
1119 case '\\':
1120 {
1121 const uchar *base = --buffer->cur;
0abc6a6a 1122
1613e52b
NB
1123 if (forms_identifier_p (pfile, true))
1124 {
1125 result->type = CPP_NAME;
1126 result->val.node = lex_identifier (pfile, base);
1127 break;
1128 }
1129 buffer->cur++;
1067694a 1130 }
1613e52b 1131
1067694a 1132 default:
6338b358
NB
1133 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1134 break;
0d9f234d 1135 }
bdcbe496
NB
1136
1137 return result;
0d9f234d
NB
1138}
1139
59325650
NB
1140/* An upper bound on the number of bytes needed to spell TOKEN.
1141 Does not include preceding whitespace. */
93c80368 1142unsigned int
6cf87ca4 1143cpp_token_len (const cpp_token *token)
0d9f234d 1144{
93c80368 1145 unsigned int len;
6d2c2047 1146
93c80368 1147 switch (TOKEN_SPELL (token))
041c3194 1148 {
59325650 1149 default: len = 4; break;
6338b358 1150 case SPELL_LITERAL: len = token->val.str.len; break;
a28c5035 1151 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
041c3194 1152 }
59325650
NB
1153
1154 return len;
6d2c2047
ZW
1155}
1156
041c3194 1157/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885 1158 already contain the enough space to hold the token's spelling.
6cf87ca4
ZW
1159 Returns a pointer to the character after the last character written.
1160 FIXME: Would be nice if we didn't need the PFILE argument. */
93c80368 1161unsigned char *
6cf87ca4
ZW
1162cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1163 unsigned char *buffer)
041c3194 1164{
96be6998 1165 switch (TOKEN_SPELL (token))
041c3194
ZW
1166 {
1167 case SPELL_OPERATOR:
1168 {
1169 const unsigned char *spelling;
1170 unsigned char c;
d6d5f795 1171
041c3194 1172 if (token->flags & DIGRAPH)
37b8524c
JDA
1173 spelling
1174 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
92936ecf
ZW
1175 else if (token->flags & NAMED_OP)
1176 goto spell_ident;
041c3194 1177 else
96be6998 1178 spelling = TOKEN_NAME (token);
df383483 1179
041c3194
ZW
1180 while ((c = *spelling++) != '\0')
1181 *buffer++ = c;
1182 }
1183 break;
d6d5f795 1184
47ad4138 1185 spell_ident:
041c3194 1186 case SPELL_IDENT:
a28c5035
NB
1187 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1188 buffer += NODE_LEN (token->val.node);
041c3194 1189 break;
d6d5f795 1190
6338b358 1191 case SPELL_LITERAL:
47ad4138
ZW
1192 memcpy (buffer, token->val.str.text, token->val.str.len);
1193 buffer += token->val.str.len;
1194 break;
1195
041c3194 1196 case SPELL_NONE:
0527bc4e
JDA
1197 cpp_error (pfile, CPP_DL_ICE,
1198 "unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1199 break;
1200 }
d6d5f795 1201
041c3194
ZW
1202 return buffer;
1203}
d6d5f795 1204
5d8ebbd8
NB
1205/* Returns TOKEN spelt as a null-terminated string. The string is
1206 freed when the reader is destroyed. Useful for diagnostics. */
93c80368 1207unsigned char *
6cf87ca4 1208cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
59325650
NB
1209{
1210 unsigned int len = cpp_token_len (token) + 1;
ece54d54 1211 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
c5a04734 1212
93c80368
NB
1213 end = cpp_spell_token (pfile, token, start);
1214 end[0] = '\0';
c5a04734 1215
93c80368
NB
1216 return start;
1217}
c5a04734 1218
5d8ebbd8
NB
1219/* Used by C front ends, which really should move to using
1220 cpp_token_as_text. */
93c80368 1221const char *
6cf87ca4 1222cpp_type2name (enum cpp_ttype type)
93c80368
NB
1223{
1224 return (const char *) token_spellings[type].name;
1225}
c5a04734 1226
4ed5bcfb
NB
1227/* Writes the spelling of token to FP, without any preceding space.
1228 Separated from cpp_spell_token for efficiency - to avoid stdio
1229 double-buffering. */
93c80368 1230void
6cf87ca4 1231cpp_output_token (const cpp_token *token, FILE *fp)
93c80368 1232{
93c80368 1233 switch (TOKEN_SPELL (token))
c5a04734 1234 {
93c80368
NB
1235 case SPELL_OPERATOR:
1236 {
1237 const unsigned char *spelling;
3b681e9d 1238 int c;
c5a04734 1239
93c80368 1240 if (token->flags & DIGRAPH)
37b8524c
JDA
1241 spelling
1242 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
93c80368
NB
1243 else if (token->flags & NAMED_OP)
1244 goto spell_ident;
1245 else
1246 spelling = TOKEN_NAME (token);
041c3194 1247
3b681e9d
ZW
1248 c = *spelling;
1249 do
1250 putc (c, fp);
1251 while ((c = *++spelling) != '\0');
93c80368
NB
1252 }
1253 break;
041c3194 1254
93c80368
NB
1255 spell_ident:
1256 case SPELL_IDENT:
3b681e9d 1257 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
93c80368 1258 break;
041c3194 1259
6338b358 1260 case SPELL_LITERAL:
47ad4138
ZW
1261 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1262 break;
1263
93c80368
NB
1264 case SPELL_NONE:
1265 /* An error, most probably. */
1266 break;
041c3194 1267 }
c5a04734
ZW
1268}
1269
93c80368
NB
1270/* Compare two tokens. */
1271int
6cf87ca4 1272_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
c5a04734 1273{
93c80368
NB
1274 if (a->type == b->type && a->flags == b->flags)
1275 switch (TOKEN_SPELL (a))
1276 {
1277 default: /* Keep compiler happy. */
1278 case SPELL_OPERATOR:
1279 return 1;
93c80368 1280 case SPELL_NONE:
56051c0a 1281 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
93c80368
NB
1282 case SPELL_IDENT:
1283 return a->val.node == b->val.node;
6338b358 1284 case SPELL_LITERAL:
93c80368
NB
1285 return (a->val.str.len == b->val.str.len
1286 && !memcmp (a->val.str.text, b->val.str.text,
1287 a->val.str.len));
1288 }
c5a04734 1289
041c3194
ZW
1290 return 0;
1291}
1292
93c80368
NB
1293/* Returns nonzero if a space should be inserted to avoid an
1294 accidental token paste for output. For simplicity, it is
1295 conservative, and occasionally advises a space where one is not
1296 needed, e.g. "." and ".2". */
93c80368 1297int
6cf87ca4
ZW
1298cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1299 const cpp_token *token2)
c5a04734 1300{
93c80368
NB
1301 enum cpp_ttype a = token1->type, b = token2->type;
1302 cppchar_t c;
c5a04734 1303
93c80368
NB
1304 if (token1->flags & NAMED_OP)
1305 a = CPP_NAME;
1306 if (token2->flags & NAMED_OP)
1307 b = CPP_NAME;
c5a04734 1308
93c80368
NB
1309 c = EOF;
1310 if (token2->flags & DIGRAPH)
37b8524c 1311 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
1312 else if (token_spellings[b].category == SPELL_OPERATOR)
1313 c = token_spellings[b].name[0];
c5a04734 1314
93c80368 1315 /* Quickly get everything that can paste with an '='. */
37b8524c 1316 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 1317 return 1;
c5a04734 1318
93c80368 1319 switch (a)
c5a04734 1320 {
93c80368
NB
1321 case CPP_GREATER: return c == '>' || c == '?';
1322 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1323 case CPP_PLUS: return c == '+';
1324 case CPP_MINUS: return c == '-' || c == '>';
1325 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1326 case CPP_MOD: return c == ':' || c == '>';
1327 case CPP_AND: return c == '&';
1328 case CPP_OR: return c == '|';
1329 case CPP_COLON: return c == ':' || c == '>';
1330 case CPP_DEREF: return c == '*';
26ec42ee 1331 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
1332 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1333 case CPP_NAME: return ((b == CPP_NUMBER
1334 && name_p (pfile, &token2->val.str))
1335 || b == CPP_NAME
1336 || b == CPP_CHAR || b == CPP_STRING); /* L */
1337 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1338 || c == '.' || c == '+' || c == '-');
1613e52b 1339 /* UCNs */
1067694a
NB
1340 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1341 && b == CPP_NAME)
1613e52b 1342 || (CPP_OPTION (pfile, objc)
1067694a 1343 && token1->val.str.text[0] == '@'
1613e52b 1344 && (b == CPP_NAME || b == CPP_STRING)));
93c80368 1345 default: break;
c5a04734 1346 }
c5a04734 1347
417f3e3a 1348 return 0;
c5a04734
ZW
1349}
1350
93c80368 1351/* Output all the remaining tokens on the current line, and a newline
4ed5bcfb
NB
1352 character, to FP. Leading whitespace is removed. If there are
1353 macros, special token padding is not performed. */
c5a04734 1354void
6cf87ca4 1355cpp_output_line (cpp_reader *pfile, FILE *fp)
c5a04734 1356{
4ed5bcfb 1357 const cpp_token *token;
96be6998 1358
4ed5bcfb
NB
1359 token = cpp_get_token (pfile);
1360 while (token->type != CPP_EOF)
96be6998 1361 {
4ed5bcfb
NB
1362 cpp_output_token (token, fp);
1363 token = cpp_get_token (pfile);
1364 if (token->flags & PREV_WHITE)
1365 putc (' ', fp);
96be6998
ZW
1366 }
1367
93c80368 1368 putc ('\n', fp);
041c3194 1369}
c5a04734 1370
1e013d2e
NB
1371/* Memory buffers. Changing these three constants can have a dramatic
1372 effect on performance. The values here are reasonable defaults,
1373 but might be tuned. If you adjust them, be sure to test across a
1374 range of uses of cpplib, including heavy nested function-like macro
1375 expansion. Also check the change in peak memory usage (NJAMD is a
1376 good tool for this). */
1377#define MIN_BUFF_SIZE 8000
87062813 1378#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1e013d2e
NB
1379#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1380 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
417f3e3a 1381
87062813
NB
1382#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1383 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1384#endif
1385
c9e7a609
NB
1386/* Create a new allocation buffer. Place the control block at the end
1387 of the buffer, so that buffer overflows will cause immediate chaos. */
b8af0ca5 1388static _cpp_buff *
6cf87ca4 1389new_buff (size_t len)
b8af0ca5
NB
1390{
1391 _cpp_buff *result;
ece54d54 1392 unsigned char *base;
b8af0ca5 1393
1e013d2e
NB
1394 if (len < MIN_BUFF_SIZE)
1395 len = MIN_BUFF_SIZE;
c70f6ed3 1396 len = CPP_ALIGN (len);
b8af0ca5
NB
1397
1398 base = xmalloc (len + sizeof (_cpp_buff));
1399 result = (_cpp_buff *) (base + len);
1400 result->base = base;
1401 result->cur = base;
1402 result->limit = base + len;
1403 result->next = NULL;
1404 return result;
1405}
1406
1407/* Place a chain of unwanted allocation buffers on the free list. */
1408void
6cf87ca4 1409_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
b8af0ca5
NB
1410{
1411 _cpp_buff *end = buff;
1412
1413 while (end->next)
1414 end = end->next;
1415 end->next = pfile->free_buffs;
1416 pfile->free_buffs = buff;
1417}
1418
1419/* Return a free buffer of size at least MIN_SIZE. */
1420_cpp_buff *
6cf87ca4 1421_cpp_get_buff (cpp_reader *pfile, size_t min_size)
b8af0ca5
NB
1422{
1423 _cpp_buff *result, **p;
1424
1425 for (p = &pfile->free_buffs;; p = &(*p)->next)
1426 {
6142088c 1427 size_t size;
1e013d2e
NB
1428
1429 if (*p == NULL)
b8af0ca5 1430 return new_buff (min_size);
1e013d2e
NB
1431 result = *p;
1432 size = result->limit - result->base;
1433 /* Return a buffer that's big enough, but don't waste one that's
1434 way too big. */
34f5271d 1435 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
b8af0ca5
NB
1436 break;
1437 }
1438
1439 *p = result->next;
1440 result->next = NULL;
1441 result->cur = result->base;
1442 return result;
1443}
1444
4fe9b91c 1445/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
1446 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1447 the excess bytes to the new buffer. Chains the new buffer after
1448 BUFF, and returns the new buffer. */
b8af0ca5 1449_cpp_buff *
6cf87ca4 1450_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
b8af0ca5 1451{
6142088c 1452 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
8c3b2693 1453 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
b8af0ca5 1454
8c3b2693
NB
1455 buff->next = new_buff;
1456 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1457 return new_buff;
1458}
1459
4fe9b91c 1460/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
1461 remaining bytes of the buffer pointed to by BUFF, and at least
1462 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1463 Chains the new buffer before the buffer pointed to by BUFF, and
1464 updates the pointer to point to the new buffer. */
1465void
6cf87ca4 1466_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
8c3b2693
NB
1467{
1468 _cpp_buff *new_buff, *old_buff = *pbuff;
1469 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1470
1471 new_buff = _cpp_get_buff (pfile, size);
1472 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1473 new_buff->next = old_buff;
1474 *pbuff = new_buff;
b8af0ca5
NB
1475}
1476
1477/* Free a chain of buffers starting at BUFF. */
1478void
5671bf27 1479_cpp_free_buff (_cpp_buff *buff)
b8af0ca5
NB
1480{
1481 _cpp_buff *next;
1482
1483 for (; buff; buff = next)
1484 {
1485 next = buff->next;
1486 free (buff->base);
1487 }
1488}
417f3e3a 1489
ece54d54
NB
1490/* Allocate permanent, unaligned storage of length LEN. */
1491unsigned char *
6cf87ca4 1492_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
ece54d54
NB
1493{
1494 _cpp_buff *buff = pfile->u_buff;
1495 unsigned char *result = buff->cur;
1496
1497 if (len > (size_t) (buff->limit - result))
1498 {
1499 buff = _cpp_get_buff (pfile, len);
1500 buff->next = pfile->u_buff;
1501 pfile->u_buff = buff;
1502 result = buff->cur;
1503 }
1504
1505 buff->cur = result + len;
1506 return result;
1507}
1508
87062813
NB
1509/* Allocate permanent, unaligned storage of length LEN from a_buff.
1510 That buffer is used for growing allocations when saving macro
1511 replacement lists in a #define, and when parsing an answer to an
1512 assertion in #assert, #unassert or #if (and therefore possibly
1513 whilst expanding macros). It therefore must not be used by any
1514 code that they might call: specifically the lexer and the guts of
1515 the macro expander.
1516
1517 All existing other uses clearly fit this restriction: storing
1518 registered pragmas during initialization. */
93c80368 1519unsigned char *
6cf87ca4 1520_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
3fef5b2b 1521{
8c3b2693
NB
1522 _cpp_buff *buff = pfile->a_buff;
1523 unsigned char *result = buff->cur;
3fef5b2b 1524
8c3b2693 1525 if (len > (size_t) (buff->limit - result))
3fef5b2b 1526 {
8c3b2693
NB
1527 buff = _cpp_get_buff (pfile, len);
1528 buff->next = pfile->a_buff;
1529 pfile->a_buff = buff;
1530 result = buff->cur;
3fef5b2b 1531 }
041c3194 1532
8c3b2693 1533 buff->cur = result + len;
93c80368 1534 return result;
041c3194 1535}
This page took 1.105001 seconds and 5 git commands to generate.