]> gcc.gnu.org Git - gcc.git/blame - libcpp/lex.c
v850.c (v850_use_dfa_pipeline_interface): New.
[gcc.git] / libcpp / lex.c
CommitLineData
45b966db 1/* CPP Library - lexical analysis.
aa335b76 2 Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
45b966db
ZW
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 2, or (at your option) any
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; if not, write to the Free Software
20Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
21
22#include "config.h"
23#include "system.h"
45b966db 24#include "cpplib.h"
4f4e53dd 25#include "internal.h"
45b966db 26
93c80368 27enum spell_type
f9a0e96c 28{
93c80368 29 SPELL_OPERATOR = 0,
93c80368 30 SPELL_IDENT,
6338b358 31 SPELL_LITERAL,
93c80368 32 SPELL_NONE
f9a0e96c
ZW
33};
34
93c80368 35struct token_spelling
f9a0e96c 36{
93c80368
NB
37 enum spell_type category;
38 const unsigned char *name;
f9a0e96c
ZW
39};
40
8206c799
ZW
41static const unsigned char *const digraph_spellings[] =
42{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
93c80368
NB
43
44#define OP(e, s) { SPELL_OPERATOR, U s },
9a238586 45#define TK(e, s) { s, U #e },
8206c799 46static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
93c80368
NB
47#undef OP
48#undef TK
49
50#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
f2d5f0cc 52
6cf87ca4
ZW
53static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54static int skip_line_comment (cpp_reader *);
55static void skip_whitespace (cpp_reader *, cppchar_t);
56static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
57static void lex_number (cpp_reader *, cpp_string *);
58static bool forms_identifier_p (cpp_reader *, int);
59static void lex_string (cpp_reader *, cpp_token *, const uchar *);
60static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
61static void create_literal (cpp_reader *, cpp_token *, const uchar *,
62 unsigned int, enum cpp_ttype);
63static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
64static int name_p (cpp_reader *, const cpp_string *);
6cf87ca4
ZW
65static tokenrun *next_tokenrun (tokenrun *);
66
6cf87ca4 67static _cpp_buff *new_buff (size_t);
15dad1d9 68
9d10c9a9 69
041c3194 70/* Utility routine:
9e62c811 71
bfb9dc7f
ZW
72 Compares, the token TOKEN to the NUL-terminated string STRING.
73 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
041c3194 74int
6cf87ca4 75cpp_ideq (const cpp_token *token, const char *string)
041c3194 76{
bfb9dc7f 77 if (token->type != CPP_NAME)
041c3194 78 return 0;
bfb9dc7f 79
562a5c27 80 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
15dad1d9 81}
1368ee70 82
26aea073
NB
83/* Record a note TYPE at byte POS into the current cleaned logical
84 line. */
87062813 85static void
6cf87ca4 86add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
0d9f234d 87{
26aea073
NB
88 if (buffer->notes_used == buffer->notes_cap)
89 {
90 buffer->notes_cap = buffer->notes_cap * 2 + 200;
703ad42b
KG
91 buffer->notes = xrealloc (buffer->notes,
92 buffer->notes_cap * sizeof (_cpp_line_note));
26aea073 93 }
0d9f234d 94
26aea073
NB
95 buffer->notes[buffer->notes_used].pos = pos;
96 buffer->notes[buffer->notes_used].type = type;
97 buffer->notes_used++;
0d9f234d
NB
98}
99
26aea073
NB
100/* Returns with a logical line that contains no escaped newlines or
101 trigraphs. This is a time-critical inner loop. */
102void
6cf87ca4 103_cpp_clean_line (cpp_reader *pfile)
45b966db 104{
26aea073
NB
105 cpp_buffer *buffer;
106 const uchar *s;
107 uchar c, *d, *p;
87062813 108
26aea073
NB
109 buffer = pfile->buffer;
110 buffer->cur_note = buffer->notes_used = 0;
111 buffer->cur = buffer->line_base = buffer->next_line;
112 buffer->need_line = false;
113 s = buffer->next_line - 1;
87062813 114
26aea073 115 if (!buffer->from_stage3)
45b966db 116 {
d08dcf87
ZW
117 /* Short circuit for the common case of an un-escaped line with
118 no trigraphs. The primary win here is by not writing any
119 data back to memory until we have to. */
120 for (;;)
121 {
122 c = *++s;
123 if (c == '\n' || c == '\r')
124 {
125 d = (uchar *) s;
126
127 if (s == buffer->rlimit)
128 goto done;
129
130 /* DOS line ending? */
131 if (c == '\r' && s[1] == '\n')
132 s++;
133
134 if (s == buffer->rlimit)
135 goto done;
136
137 /* check for escaped newline */
138 p = d;
139 while (p != buffer->next_line && is_nvspace (p[-1]))
140 p--;
141 if (p == buffer->next_line || p[-1] != '\\')
142 goto done;
143
144 /* Have an escaped newline; process it and proceed to
145 the slow path. */
146 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
147 d = p - 2;
148 buffer->next_line = p - 1;
149 break;
150 }
151 if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
152 {
153 /* Have a trigraph. We may or may not have to convert
154 it. Add a line note regardless, for -Wtrigraphs. */
155 add_line_note (buffer, s, s[2]);
156 if (CPP_OPTION (pfile, trigraphs))
157 {
158 /* We do, and that means we have to switch to the
159 slow path. */
160 d = (uchar *) s;
161 *d = _cpp_trigraph_map[s[2]];
162 s += 2;
163 break;
164 }
165 }
166 }
167
26aea073
NB
168
169 for (;;)
4a5b68a2 170 {
26aea073
NB
171 c = *++s;
172 *++d = c;
173
174 if (c == '\n' || c == '\r')
175 {
176 /* Handle DOS line endings. */
177 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
178 s++;
179 if (s == buffer->rlimit)
180 break;
181
182 /* Escaped? */
183 p = d;
184 while (p != buffer->next_line && is_nvspace (p[-1]))
185 p--;
186 if (p == buffer->next_line || p[-1] != '\\')
187 break;
188
41c32c98 189 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
26aea073
NB
190 d = p - 2;
191 buffer->next_line = p - 1;
192 }
193 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
194 {
195 /* Add a note regardless, for the benefit of -Wtrigraphs. */
41c32c98 196 add_line_note (buffer, d, s[2]);
26aea073
NB
197 if (CPP_OPTION (pfile, trigraphs))
198 {
199 *d = _cpp_trigraph_map[s[2]];
200 s += 2;
201 }
202 }
4a5b68a2 203 }
45b966db 204 }
26aea073
NB
205 else
206 {
207 do
208 s++;
209 while (*s != '\n' && *s != '\r');
210 d = (uchar *) s;
211
212 /* Handle DOS line endings. */
213 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
214 s++;
215 }
0d9f234d 216
d08dcf87 217 done:
26aea073 218 *d = '\n';
41c32c98
NB
219 /* A sentinel note that should never be processed. */
220 add_line_note (buffer, d + 1, '\n');
26aea073 221 buffer->next_line = s + 1;
45b966db
ZW
222}
223
a8eb6044
NB
224/* Return true if the trigraph indicated by NOTE should be warned
225 about in a comment. */
226static bool
6cf87ca4 227warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
a8eb6044
NB
228{
229 const uchar *p;
230
231 /* Within comments we don't warn about trigraphs, unless the
232 trigraph forms an escaped newline, as that may change
6356f892 233 behavior. */
a8eb6044
NB
234 if (note->type != '/')
235 return false;
236
237 /* If -trigraphs, then this was an escaped newline iff the next note
238 is coincident. */
239 if (CPP_OPTION (pfile, trigraphs))
240 return note[1].pos == note->pos;
241
242 /* Otherwise, see if this forms an escaped newline. */
243 p = note->pos + 3;
244 while (is_nvspace (*p))
245 p++;
246
247 /* There might have been escaped newlines between the trigraph and the
248 newline we found. Hence the position test. */
249 return (*p == '\n' && p < note[1].pos);
250}
251
26aea073
NB
252/* Process the notes created by add_line_note as far as the current
253 location. */
254void
6cf87ca4 255_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
45b966db 256{
29401c30
NB
257 cpp_buffer *buffer = pfile->buffer;
258
26aea073 259 for (;;)
041c3194 260 {
26aea073
NB
261 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
262 unsigned int col;
a5c3cccd 263
26aea073
NB
264 if (note->pos > buffer->cur)
265 break;
a5c3cccd 266
26aea073
NB
267 buffer->cur_note++;
268 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
4d6baafa 269
41c32c98 270 if (note->type == '\\' || note->type == ' ')
26aea073 271 {
41c32c98 272 if (note->type == ' ' && !in_comment)
500bee0a 273 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
26aea073 274 "backslash and newline separated by space");
41c32c98 275
26aea073 276 if (buffer->next_line > buffer->rlimit)
87062813 277 {
500bee0a 278 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
26aea073
NB
279 "backslash-newline at end of file");
280 /* Prevent "no newline at end of file" warning. */
281 buffer->next_line = buffer->rlimit;
87062813 282 }
26aea073
NB
283
284 buffer->line_base = note->pos;
12f9df4e 285 CPP_INCREMENT_LINE (pfile, 0);
0d9f234d 286 }
41c32c98
NB
287 else if (_cpp_trigraph_map[note->type])
288 {
a8eb6044
NB
289 if (CPP_OPTION (pfile, warn_trigraphs)
290 && (!in_comment || warn_in_comment (pfile, note)))
41c32c98
NB
291 {
292 if (CPP_OPTION (pfile, trigraphs))
500bee0a 293 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
41c32c98
NB
294 "trigraph ??%c converted to %c",
295 note->type,
296 (int) _cpp_trigraph_map[note->type]);
297 else
905bd7b5
GK
298 {
299 cpp_error_with_line
500bee0a 300 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
905bd7b5
GK
301 "trigraph ??%c ignored, use -trigraphs to enable",
302 note->type);
303 }
41c32c98
NB
304 }
305 }
306 else
307 abort ();
041c3194 308 }
45b966db
ZW
309}
310
0d9f234d
NB
311/* Skip a C-style block comment. We find the end of the comment by
312 seeing if an asterisk is before every '/' we encounter. Returns
6f572ac2
NB
313 nonzero if comment terminated by EOF, zero otherwise.
314
315 Buffer->cur points to the initial asterisk of the comment. */
26aea073 316bool
6cf87ca4 317_cpp_skip_block_comment (cpp_reader *pfile)
45b966db 318{
041c3194 319 cpp_buffer *buffer = pfile->buffer;
d08dcf87
ZW
320 const uchar *cur = buffer->cur;
321 uchar c;
0d9f234d 322
d08dcf87
ZW
323 cur++;
324 if (*cur == '/')
325 cur++;
0d9f234d 326
26aea073
NB
327 for (;;)
328 {
0d9f234d
NB
329 /* People like decorating comments with '*', so check for '/'
330 instead for efficiency. */
d08dcf87
ZW
331 c = *cur++;
332
041c3194 333 if (c == '/')
45b966db 334 {
d08dcf87 335 if (cur[-2] == '*')
0d9f234d 336 break;
041c3194 337
0d9f234d 338 /* Warn about potential nested comments, but not if the '/'
a1f300c0 339 comes immediately before the true comment delimiter.
041c3194 340 Don't bother to get it right across escaped newlines. */
0d9f234d 341 if (CPP_OPTION (pfile, warn_comments)
d08dcf87
ZW
342 && cur[0] == '*' && cur[1] != '/')
343 {
344 buffer->cur = cur;
0527bc4e 345 cpp_error_with_line (pfile, CPP_DL_WARNING,
500bee0a 346 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
d08dcf87
ZW
347 "\"/*\" within comment");
348 }
45b966db 349 }
26aea073
NB
350 else if (c == '\n')
351 {
12f9df4e 352 unsigned int cols;
d08dcf87 353 buffer->cur = cur - 1;
26aea073
NB
354 _cpp_process_line_notes (pfile, true);
355 if (buffer->next_line >= buffer->rlimit)
356 return true;
357 _cpp_clean_line (pfile);
12f9df4e
PB
358
359 cols = buffer->next_line - buffer->line_base;
360 CPP_INCREMENT_LINE (pfile, cols);
361
d08dcf87 362 cur = buffer->cur;
26aea073 363 }
45b966db 364 }
041c3194 365
d08dcf87 366 buffer->cur = cur;
a8eb6044 367 _cpp_process_line_notes (pfile, true);
26aea073 368 return false;
45b966db
ZW
369}
370
480709cc 371/* Skip a C++ line comment, leaving buffer->cur pointing to the
da7d8304 372 terminating newline. Handles escaped newlines. Returns nonzero
480709cc 373 if a multiline comment. */
041c3194 374static int
6cf87ca4 375skip_line_comment (cpp_reader *pfile)
45b966db 376{
cbcff6df 377 cpp_buffer *buffer = pfile->buffer;
500bee0a 378 unsigned int orig_line = pfile->line_table->highest_line;
041c3194 379
26aea073
NB
380 while (*buffer->cur != '\n')
381 buffer->cur++;
480709cc 382
26aea073 383 _cpp_process_line_notes (pfile, true);
500bee0a 384 return orig_line != pfile->line_table->highest_line;
041c3194 385}
45b966db 386
26aea073 387/* Skips whitespace, saving the next non-whitespace character. */
52fadca8 388static void
6cf87ca4 389skip_whitespace (cpp_reader *pfile, cppchar_t c)
041c3194
ZW
390{
391 cpp_buffer *buffer = pfile->buffer;
f7d151fb 392 bool saw_NUL = false;
45b966db 393
0d9f234d 394 do
041c3194 395 {
91fcd158 396 /* Horizontal space always OK. */
26aea073 397 if (c == ' ' || c == '\t')
0d9f234d 398 ;
0d9f234d 399 /* Just \f \v or \0 left. */
91fcd158 400 else if (c == '\0')
f7d151fb 401 saw_NUL = true;
93c80368 402 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
500bee0a 403 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
ebef4e8c
NB
404 CPP_BUF_COL (buffer),
405 "%s in preprocessing directive",
406 c == '\f' ? "form feed" : "vertical tab");
0d9f234d 407
0d9f234d 408 c = *buffer->cur++;
45b966db 409 }
ec5c56db 410 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
0d9f234d
NB
411 while (is_nvspace (c));
412
f7d151fb 413 if (saw_NUL)
0527bc4e 414 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
f7d151fb 415
480709cc 416 buffer->cur--;
041c3194 417}
45b966db 418
93c80368
NB
419/* See if the characters of a number token are valid in a name (no
420 '.', '+' or '-'). */
421static int
6cf87ca4 422name_p (cpp_reader *pfile, const cpp_string *string)
93c80368
NB
423{
424 unsigned int i;
425
426 for (i = 0; i < string->len; i++)
427 if (!is_idchar (string->text[i]))
428 return 0;
429
df383483 430 return 1;
93c80368
NB
431}
432
bced6edf 433/* Returns TRUE if the sequence starting at buffer->cur is invalid in
1613e52b 434 an identifier. FIRST is TRUE if this starts an identifier. */
bced6edf 435static bool
6cf87ca4 436forms_identifier_p (cpp_reader *pfile, int first)
bced6edf 437{
1613e52b
NB
438 cpp_buffer *buffer = pfile->buffer;
439
440 if (*buffer->cur == '$')
441 {
442 if (!CPP_OPTION (pfile, dollars_in_ident))
443 return false;
444
445 buffer->cur++;
78b8811a 446 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1613e52b 447 {
78b8811a 448 CPP_OPTION (pfile, warn_dollars) = 0;
0527bc4e 449 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1613e52b
NB
450 }
451
452 return true;
453 }
bced6edf 454
1613e52b
NB
455 /* Is this a syntactically valid UCN? */
456 if (0 && *buffer->cur == '\\'
457 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
bced6edf 458 {
1613e52b 459 buffer->cur += 2;
e6cc3a24 460 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
1613e52b
NB
461 return true;
462 buffer->cur -= 2;
bced6edf 463 }
bced6edf 464
1613e52b 465 return false;
bced6edf
NB
466}
467
468/* Lex an identifier starting at BUFFER->CUR - 1. */
0d9f234d 469static cpp_hashnode *
6cf87ca4 470lex_identifier (cpp_reader *pfile, const uchar *base)
45b966db 471{
93c80368 472 cpp_hashnode *result;
1613e52b 473 const uchar *cur;
2c3fcba6 474
bced6edf 475 do
10cf9bde 476 {
bced6edf
NB
477 cur = pfile->buffer->cur;
478
479 /* N.B. ISIDNUM does not include $. */
480 while (ISIDNUM (*cur))
481 cur++;
10cf9bde 482
10cf9bde 483 pfile->buffer->cur = cur;
2c3fcba6 484 }
1613e52b 485 while (forms_identifier_p (pfile, false));
bced6edf
NB
486
487 result = (cpp_hashnode *)
488 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
2c3fcba6 489
bced6edf 490 /* Rarely, identifiers require diagnostics when lexed. */
2c3fcba6
ZW
491 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
492 && !pfile->state.skipping, 0))
493 {
494 /* It is allowed to poison the same identifier twice. */
495 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
0527bc4e 496 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
2c3fcba6
ZW
497 NODE_NAME (result));
498
499 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
500 replacement list of a variadic macro. */
501 if (result == pfile->spec_nodes.n__VA_ARGS__
502 && !pfile->state.va_args_ok)
0527bc4e 503 cpp_error (pfile, CPP_DL_PEDWARN,
6cf87ca4
ZW
504 "__VA_ARGS__ can only appear in the expansion"
505 " of a C99 variadic macro");
2c3fcba6
ZW
506 }
507
508 return result;
509}
510
bced6edf 511/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
45b966db 512static void
6cf87ca4 513lex_number (cpp_reader *pfile, cpp_string *number)
45b966db 514{
562a5c27 515 const uchar *cur;
bced6edf
NB
516 const uchar *base;
517 uchar *dest;
45b966db 518
bced6edf
NB
519 base = pfile->buffer->cur - 1;
520 do
041c3194 521 {
bced6edf 522 cur = pfile->buffer->cur;
0d9f234d 523
bced6edf
NB
524 /* N.B. ISIDNUM does not include $. */
525 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
526 cur++;
45b966db 527
10cf9bde 528 pfile->buffer->cur = cur;
45b966db 529 }
1613e52b 530 while (forms_identifier_p (pfile, false));
93c80368 531
bced6edf
NB
532 number->len = cur - base;
533 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
534 memcpy (dest, base, number->len);
535 dest[number->len] = '\0';
536 number->text = dest;
93c80368
NB
537}
538
6338b358
NB
539/* Create a token of type TYPE with a literal spelling. */
540static void
6cf87ca4
ZW
541create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
542 unsigned int len, enum cpp_ttype type)
6338b358
NB
543{
544 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
545
546 memcpy (dest, base, len);
547 dest[len] = '\0';
548 token->type = type;
549 token->val.str.len = len;
550 token->val.str.text = dest;
551}
552
bced6edf 553/* Lexes a string, character constant, or angle-bracketed header file
6338b358
NB
554 name. The stored string contains the spelling, including opening
555 quote and leading any leading 'L'. It returns the type of the
556 literal, or CPP_OTHER if it was not properly terminated.
557
558 The spelling is NUL-terminated, but it is not guaranteed that this
559 is the first NUL since embedded NULs are preserved. */
041c3194 560static void
6cf87ca4 561lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
45b966db 562{
6338b358
NB
563 bool saw_NUL = false;
564 const uchar *cur;
bced6edf 565 cppchar_t terminator;
6338b358
NB
566 enum cpp_ttype type;
567
568 cur = base;
569 terminator = *cur++;
570 if (terminator == 'L')
571 terminator = *cur++;
572 if (terminator == '\"')
573 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
574 else if (terminator == '\'')
575 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
576 else
577 terminator = '>', type = CPP_HEADER_NAME;
93c80368 578
0d9f234d 579 for (;;)
45b966db 580 {
6338b358 581 cppchar_t c = *cur++;
7868b4a2 582
6f572ac2 583 /* In #include-style directives, terminators are not escapable. */
6338b358
NB
584 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
585 cur++;
586 else if (c == terminator)
bced6edf 587 break;
6338b358 588 else if (c == '\n')
0d9f234d 589 {
6338b358
NB
590 cur--;
591 type = CPP_OTHER;
592 break;
45b966db 593 }
6338b358
NB
594 else if (c == '\0')
595 saw_NUL = true;
45b966db
ZW
596 }
597
6338b358 598 if (saw_NUL && !pfile->state.skipping)
0527bc4e
JDA
599 cpp_error (pfile, CPP_DL_WARNING,
600 "null character(s) preserved in literal");
45b966db 601
6338b358
NB
602 pfile->buffer->cur = cur;
603 create_literal (pfile, token, base, cur - base, type);
0d9f234d 604}
041c3194 605
93c80368 606/* The stored comment includes the comment start and any terminator. */
9e62c811 607static void
6cf87ca4
ZW
608save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
609 cppchar_t type)
9e62c811 610{
041c3194 611 unsigned char *buffer;
477cdac7 612 unsigned int len, clen;
df383483 613
1c6d33ef 614 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
480709cc 615
3542203b
NB
616 /* C++ comments probably (not definitely) have moved past a new
617 line, which we don't want to save in the comment. */
480709cc 618 if (is_vspace (pfile->buffer->cur[-1]))
3542203b 619 len--;
477cdac7
JT
620
621 /* If we are currently in a directive, then we need to store all
622 C++ comments as C comments internally, and so we need to
623 allocate a little extra space in that case.
624
625 Note that the only time we encounter a directive here is
626 when we are saving comments in a "#define". */
627 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
628
629 buffer = _cpp_unaligned_alloc (pfile, clen);
df383483 630
041c3194 631 token->type = CPP_COMMENT;
477cdac7 632 token->val.str.len = clen;
0d9f234d 633 token->val.str.text = buffer;
45b966db 634
1c6d33ef
NB
635 buffer[0] = '/';
636 memcpy (buffer + 1, from, len - 1);
477cdac7 637
1eeeb6a4 638 /* Finish conversion to a C comment, if necessary. */
477cdac7
JT
639 if (pfile->state.in_directive && type == '/')
640 {
641 buffer[1] = '*';
642 buffer[clen - 2] = '*';
643 buffer[clen - 1] = '/';
644 }
0d9f234d 645}
45b966db 646
5fddcffc
NB
647/* Allocate COUNT tokens for RUN. */
648void
6cf87ca4 649_cpp_init_tokenrun (tokenrun *run, unsigned int count)
5fddcffc
NB
650{
651 run->base = xnewvec (cpp_token, count);
652 run->limit = run->base + count;
653 run->next = NULL;
654}
655
656/* Returns the next tokenrun, or creates one if there is none. */
657static tokenrun *
6cf87ca4 658next_tokenrun (tokenrun *run)
5fddcffc
NB
659{
660 if (run->next == NULL)
661 {
662 run->next = xnew (tokenrun);
bdcbe496 663 run->next->prev = run;
5fddcffc
NB
664 _cpp_init_tokenrun (run->next, 250);
665 }
666
667 return run->next;
668}
669
4ed5bcfb
NB
670/* Allocate a single token that is invalidated at the same time as the
671 rest of the tokens on the line. Has its line and col set to the
672 same as the last lexed token, so that diagnostics appear in the
673 right place. */
674cpp_token *
6cf87ca4 675_cpp_temp_token (cpp_reader *pfile)
4ed5bcfb
NB
676{
677 cpp_token *old, *result;
678
679 old = pfile->cur_token - 1;
680 if (pfile->cur_token == pfile->cur_run->limit)
681 {
682 pfile->cur_run = next_tokenrun (pfile->cur_run);
683 pfile->cur_token = pfile->cur_run->base;
684 }
685
686 result = pfile->cur_token++;
12f9df4e 687 result->src_loc = old->src_loc;
4ed5bcfb
NB
688 return result;
689}
690
14baae01
NB
691/* Lex a token into RESULT (external interface). Takes care of issues
692 like directive handling, token lookahead, multiple include
a1f300c0 693 optimization and skipping. */
345894b4 694const cpp_token *
6cf87ca4 695_cpp_lex_token (cpp_reader *pfile)
5fddcffc 696{
bdcbe496 697 cpp_token *result;
5fddcffc 698
bdcbe496 699 for (;;)
5fddcffc 700 {
bdcbe496 701 if (pfile->cur_token == pfile->cur_run->limit)
5fddcffc 702 {
bdcbe496
NB
703 pfile->cur_run = next_tokenrun (pfile->cur_run);
704 pfile->cur_token = pfile->cur_run->base;
5fddcffc
NB
705 }
706
bdcbe496 707 if (pfile->lookaheads)
14baae01
NB
708 {
709 pfile->lookaheads--;
710 result = pfile->cur_token++;
711 }
bdcbe496 712 else
14baae01 713 result = _cpp_lex_direct (pfile);
bdcbe496
NB
714
715 if (result->flags & BOL)
5fddcffc 716 {
bdcbe496
NB
717 /* Is this a directive. If _cpp_handle_directive returns
718 false, it is an assembler #. */
719 if (result->type == CPP_HASH
e808ec9c
NB
720 /* 6.10.3 p 11: Directives in a list of macro arguments
721 gives undefined behavior. This implementation
722 handles the directive as normal. */
723 && pfile->state.parsing_args != 1
bdcbe496
NB
724 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
725 continue;
97293897 726 if (pfile->cb.line_change && !pfile->state.skipping)
6cf87ca4 727 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
5fddcffc 728 }
5fddcffc 729
bdcbe496
NB
730 /* We don't skip tokens in directives. */
731 if (pfile->state.in_directive)
732 break;
5fddcffc 733
bdcbe496 734 /* Outside a directive, invalidate controlling macros. At file
14baae01 735 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
6356f892 736 get here and MI optimization works. */
5fddcffc 737 pfile->mi_valid = false;
bdcbe496
NB
738
739 if (!pfile->state.skipping || result->type == CPP_EOF)
740 break;
5fddcffc
NB
741 }
742
345894b4 743 return result;
5fddcffc
NB
744}
745
26aea073
NB
746/* Returns true if a fresh line has been loaded. */
747bool
6cf87ca4 748_cpp_get_fresh_line (cpp_reader *pfile)
004cb263 749{
22234f56
PB
750 int return_at_eof;
751
26aea073
NB
752 /* We can't get a new line until we leave the current directive. */
753 if (pfile->state.in_directive)
754 return false;
df383483 755
26aea073 756 for (;;)
1a76916c 757 {
26aea073 758 cpp_buffer *buffer = pfile->buffer;
1a76916c 759
26aea073
NB
760 if (!buffer->need_line)
761 return true;
762
763 if (buffer->next_line < buffer->rlimit)
004cb263 764 {
26aea073
NB
765 _cpp_clean_line (pfile);
766 return true;
767 }
004cb263 768
26aea073
NB
769 /* First, get out of parsing arguments state. */
770 if (pfile->state.parsing_args)
771 return false;
772
773 /* End of buffer. Non-empty files should end in a newline. */
774 if (buffer->buf != buffer->rlimit
775 && buffer->next_line > buffer->rlimit
776 && !buffer->from_stage3)
777 {
778 /* Only warn once. */
779 buffer->next_line = buffer->rlimit;
500bee0a 780 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
26aea073
NB
781 CPP_BUF_COLUMN (buffer, buffer->cur),
782 "no newline at end of file");
783 }
22234f56
PB
784
785 return_at_eof = buffer->return_at_eof;
26aea073 786 _cpp_pop_buffer (pfile);
22234f56 787 if (pfile->buffer == NULL || return_at_eof)
a506c55c 788 return false;
26aea073 789 }
004cb263
NB
790}
791
6f572ac2
NB
792#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
793 do \
794 { \
795 result->type = ELSE_TYPE; \
796 if (*buffer->cur == CHAR) \
797 buffer->cur++, result->type = THEN_TYPE; \
798 } \
799 while (0)
480709cc 800
14baae01
NB
801/* Lex a token into pfile->cur_token, which is also incremented, to
802 get diagnostics pointing to the correct location.
803
804 Does not handle issues such as token lookahead, multiple-include
f1ba665b 805 optimization, directives, skipping etc. This function is only
14baae01
NB
806 suitable for use by _cpp_lex_token, and in special cases like
807 lex_expansion_token which doesn't care for any of these issues.
808
809 When meeting a newline, returns CPP_EOF if parsing a directive,
810 otherwise returns to the start of the token buffer if permissible.
811 Returns the location of the lexed token. */
812cpp_token *
6cf87ca4 813_cpp_lex_direct (cpp_reader *pfile)
45b966db 814{
0d9f234d 815 cppchar_t c;
adb84b42 816 cpp_buffer *buffer;
0d9f234d 817 const unsigned char *comment_start;
14baae01 818 cpp_token *result = pfile->cur_token++;
9ec7291f 819
5fddcffc 820 fresh_line:
26aea073 821 result->flags = 0;
2be570f9 822 buffer = pfile->buffer;
a506c55c 823 if (buffer->need_line)
26aea073
NB
824 {
825 if (!_cpp_get_fresh_line (pfile))
826 {
827 result->type = CPP_EOF;
9ff7868d
NB
828 if (!pfile->state.in_directive)
829 {
830 /* Tell the compiler the line number of the EOF token. */
500bee0a 831 result->src_loc = pfile->line_table->highest_line;
9ff7868d
NB
832 result->flags = BOL;
833 }
26aea073
NB
834 return result;
835 }
836 if (!pfile->keep_tokens)
837 {
838 pfile->cur_run = &pfile->base_run;
839 result = pfile->base_run.base;
840 pfile->cur_token = result + 1;
841 }
842 result->flags = BOL;
843 if (pfile->state.parsing_args == 2)
844 result->flags |= PREV_WHITE;
845 }
a506c55c 846 buffer = pfile->buffer;
5fddcffc 847 update_tokens_line:
500bee0a 848 result->src_loc = pfile->line_table->highest_line;
041c3194 849
5fddcffc 850 skipped_white:
26aea073
NB
851 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
852 && !pfile->overlaid_buffer)
853 {
854 _cpp_process_line_notes (pfile, false);
500bee0a 855 result->src_loc = pfile->line_table->highest_line;
26aea073 856 }
480709cc 857 c = *buffer->cur++;
12f9df4e 858
500bee0a
PB
859 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
860 CPP_BUF_COLUMN (buffer, buffer->cur));
5fddcffc 861
0d9f234d 862 switch (c)
45b966db 863 {
4d6baafa
NB
864 case ' ': case '\t': case '\f': case '\v': case '\0':
865 result->flags |= PREV_WHITE;
26aea073
NB
866 skip_whitespace (pfile, c);
867 goto skipped_white;
0d9f234d 868
26aea073 869 case '\n':
12f9df4e
PB
870 if (buffer->cur < buffer->rlimit)
871 CPP_INCREMENT_LINE (pfile, 0);
26aea073
NB
872 buffer->need_line = true;
873 goto fresh_line;
46d07497 874
0d9f234d
NB
875 case '0': case '1': case '2': case '3': case '4':
876 case '5': case '6': case '7': case '8': case '9':
877 result->type = CPP_NUMBER;
bced6edf 878 lex_number (pfile, &result->val.str);
0d9f234d 879 break;
46d07497 880
0abc6a6a
NB
881 case 'L':
882 /* 'L' may introduce wide characters or strings. */
bced6edf
NB
883 if (*buffer->cur == '\'' || *buffer->cur == '"')
884 {
6338b358 885 lex_string (pfile, result, buffer->cur - 1);
bced6edf
NB
886 break;
887 }
df383483 888 /* Fall through. */
0abc6a6a 889
0d9f234d
NB
890 case '_':
891 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
892 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
893 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
894 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
895 case 'y': case 'z':
896 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
0abc6a6a 897 case 'G': case 'H': case 'I': case 'J': case 'K':
0d9f234d
NB
898 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
899 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
900 case 'Y': case 'Z':
901 result->type = CPP_NAME;
1613e52b 902 result->val.node = lex_identifier (pfile, buffer->cur - 1);
0d9f234d 903
0d9f234d 904 /* Convert named operators to their proper types. */
0abc6a6a 905 if (result->val.node->flags & NODE_OPERATOR)
0d9f234d
NB
906 {
907 result->flags |= NAMED_OP;
4977bab6 908 result->type = result->val.node->directive_index;
0d9f234d
NB
909 }
910 break;
911
912 case '\'':
913 case '"':
6338b358 914 lex_string (pfile, result, buffer->cur - 1);
0d9f234d 915 break;
041c3194 916
0d9f234d 917 case '/':
1c6d33ef
NB
918 /* A potential block or line comment. */
919 comment_start = buffer->cur;
6f572ac2
NB
920 c = *buffer->cur;
921
1c6d33ef
NB
922 if (c == '*')
923 {
26aea073 924 if (_cpp_skip_block_comment (pfile))
0527bc4e 925 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
0d9f234d 926 }
480709cc 927 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
12f9df4e 928 || cpp_in_system_header (pfile)))
0d9f234d 929 {
bdb05a7b
NB
930 /* Warn about comments only if pedantically GNUC89, and not
931 in system headers. */
932 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 933 && ! buffer->warned_cplusplus_comments)
041c3194 934 {
0527bc4e 935 cpp_error (pfile, CPP_DL_PEDWARN,
56508306 936 "C++ style comments are not allowed in ISO C90");
0527bc4e 937 cpp_error (pfile, CPP_DL_PEDWARN,
ebef4e8c 938 "(this will be reported only once per input file)");
1c6d33ef
NB
939 buffer->warned_cplusplus_comments = 1;
940 }
0d9f234d 941
01ef6563 942 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
0527bc4e 943 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1c6d33ef 944 }
480709cc
NB
945 else if (c == '=')
946 {
6f572ac2 947 buffer->cur++;
480709cc
NB
948 result->type = CPP_DIV_EQ;
949 break;
950 }
951 else
952 {
480709cc
NB
953 result->type = CPP_DIV;
954 break;
955 }
0d9f234d 956
1c6d33ef
NB
957 if (!pfile->state.save_comments)
958 {
959 result->flags |= PREV_WHITE;
5fddcffc 960 goto update_tokens_line;
0d9f234d 961 }
1c6d33ef
NB
962
963 /* Save the comment as a token in its own right. */
477cdac7 964 save_comment (pfile, result, comment_start, c);
bdcbe496 965 break;
0d9f234d
NB
966
967 case '<':
968 if (pfile->state.angled_headers)
969 {
6338b358 970 lex_string (pfile, result, buffer->cur - 1);
480709cc 971 break;
0d9f234d 972 }
45b966db 973
6f572ac2
NB
974 result->type = CPP_LESS;
975 if (*buffer->cur == '=')
976 buffer->cur++, result->type = CPP_LESS_EQ;
977 else if (*buffer->cur == '<')
0d9f234d 978 {
6f572ac2
NB
979 buffer->cur++;
980 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
0d9f234d 981 }
6f572ac2 982 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
0d9f234d 983 {
6f572ac2
NB
984 buffer->cur++;
985 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
0d9f234d 986 }
6f572ac2 987 else if (CPP_OPTION (pfile, digraphs))
480709cc 988 {
6f572ac2
NB
989 if (*buffer->cur == ':')
990 {
991 buffer->cur++;
992 result->flags |= DIGRAPH;
993 result->type = CPP_OPEN_SQUARE;
994 }
995 else if (*buffer->cur == '%')
996 {
997 buffer->cur++;
998 result->flags |= DIGRAPH;
999 result->type = CPP_OPEN_BRACE;
1000 }
480709cc 1001 }
0d9f234d
NB
1002 break;
1003
1004 case '>':
6f572ac2
NB
1005 result->type = CPP_GREATER;
1006 if (*buffer->cur == '=')
1007 buffer->cur++, result->type = CPP_GREATER_EQ;
1008 else if (*buffer->cur == '>')
0d9f234d 1009 {
6f572ac2
NB
1010 buffer->cur++;
1011 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1012 }
1013 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1014 {
1015 buffer->cur++;
1016 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
0d9f234d
NB
1017 }
1018 break;
1019
cbcff6df 1020 case '%':
6f572ac2
NB
1021 result->type = CPP_MOD;
1022 if (*buffer->cur == '=')
1023 buffer->cur++, result->type = CPP_MOD_EQ;
1024 else if (CPP_OPTION (pfile, digraphs))
480709cc 1025 {
6f572ac2 1026 if (*buffer->cur == ':')
480709cc 1027 {
6f572ac2
NB
1028 buffer->cur++;
1029 result->flags |= DIGRAPH;
1030 result->type = CPP_HASH;
1031 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1032 buffer->cur += 2, result->type = CPP_PASTE;
1033 }
1034 else if (*buffer->cur == '>')
1035 {
1036 buffer->cur++;
1037 result->flags |= DIGRAPH;
1038 result->type = CPP_CLOSE_BRACE;
480709cc 1039 }
480709cc 1040 }
0d9f234d
NB
1041 break;
1042
cbcff6df 1043 case '.':
480709cc 1044 result->type = CPP_DOT;
6f572ac2 1045 if (ISDIGIT (*buffer->cur))
480709cc
NB
1046 {
1047 result->type = CPP_NUMBER;
bced6edf 1048 lex_number (pfile, &result->val.str);
480709cc 1049 }
6f572ac2
NB
1050 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1051 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1052 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1053 buffer->cur++, result->type = CPP_DOT_STAR;
0d9f234d 1054 break;
45b966db 1055
0d9f234d 1056 case '+':
6f572ac2
NB
1057 result->type = CPP_PLUS;
1058 if (*buffer->cur == '+')
1059 buffer->cur++, result->type = CPP_PLUS_PLUS;
1060 else if (*buffer->cur == '=')
1061 buffer->cur++, result->type = CPP_PLUS_EQ;
0d9f234d 1062 break;
04e3ec78 1063
0d9f234d 1064 case '-':
6f572ac2
NB
1065 result->type = CPP_MINUS;
1066 if (*buffer->cur == '>')
0d9f234d 1067 {
6f572ac2 1068 buffer->cur++;
480709cc 1069 result->type = CPP_DEREF;
6f572ac2
NB
1070 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1071 buffer->cur++, result->type = CPP_DEREF_STAR;
480709cc 1072 }
6f572ac2
NB
1073 else if (*buffer->cur == '-')
1074 buffer->cur++, result->type = CPP_MINUS_MINUS;
1075 else if (*buffer->cur == '=')
1076 buffer->cur++, result->type = CPP_MINUS_EQ;
0d9f234d 1077 break;
45b966db 1078
0d9f234d 1079 case '&':
6f572ac2
NB
1080 result->type = CPP_AND;
1081 if (*buffer->cur == '&')
1082 buffer->cur++, result->type = CPP_AND_AND;
1083 else if (*buffer->cur == '=')
1084 buffer->cur++, result->type = CPP_AND_EQ;
0d9f234d 1085 break;
df383483 1086
0d9f234d 1087 case '|':
6f572ac2
NB
1088 result->type = CPP_OR;
1089 if (*buffer->cur == '|')
1090 buffer->cur++, result->type = CPP_OR_OR;
1091 else if (*buffer->cur == '=')
1092 buffer->cur++, result->type = CPP_OR_EQ;
0d9f234d 1093 break;
45b966db 1094
0d9f234d 1095 case ':':
6f572ac2
NB
1096 result->type = CPP_COLON;
1097 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1098 buffer->cur++, result->type = CPP_SCOPE;
1099 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
0d9f234d 1100 {
6f572ac2 1101 buffer->cur++;
0d9f234d 1102 result->flags |= DIGRAPH;
480709cc
NB
1103 result->type = CPP_CLOSE_SQUARE;
1104 }
0d9f234d 1105 break;
45b966db 1106
480709cc
NB
1107 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1108 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1109 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1110 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1111 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1112
26aea073 1113 case '?': result->type = CPP_QUERY; break;
0d9f234d
NB
1114 case '~': result->type = CPP_COMPL; break;
1115 case ',': result->type = CPP_COMMA; break;
1116 case '(': result->type = CPP_OPEN_PAREN; break;
1117 case ')': result->type = CPP_CLOSE_PAREN; break;
1118 case '[': result->type = CPP_OPEN_SQUARE; break;
1119 case ']': result->type = CPP_CLOSE_SQUARE; break;
1120 case '{': result->type = CPP_OPEN_BRACE; break;
1121 case '}': result->type = CPP_CLOSE_BRACE; break;
1122 case ';': result->type = CPP_SEMICOLON; break;
1123
40f03658 1124 /* @ is a punctuator in Objective-C. */
cc937581 1125 case '@': result->type = CPP_ATSIGN; break;
0d9f234d 1126
0abc6a6a 1127 case '$':
1613e52b
NB
1128 case '\\':
1129 {
1130 const uchar *base = --buffer->cur;
0abc6a6a 1131
1613e52b
NB
1132 if (forms_identifier_p (pfile, true))
1133 {
1134 result->type = CPP_NAME;
1135 result->val.node = lex_identifier (pfile, base);
1136 break;
1137 }
1138 buffer->cur++;
1067694a 1139 }
1613e52b 1140
1067694a 1141 default:
6338b358
NB
1142 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1143 break;
0d9f234d 1144 }
bdcbe496
NB
1145
1146 return result;
0d9f234d
NB
1147}
1148
59325650
NB
1149/* An upper bound on the number of bytes needed to spell TOKEN.
1150 Does not include preceding whitespace. */
93c80368 1151unsigned int
6cf87ca4 1152cpp_token_len (const cpp_token *token)
0d9f234d 1153{
93c80368 1154 unsigned int len;
6d2c2047 1155
93c80368 1156 switch (TOKEN_SPELL (token))
041c3194 1157 {
59325650 1158 default: len = 4; break;
6338b358 1159 case SPELL_LITERAL: len = token->val.str.len; break;
a28c5035 1160 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
041c3194 1161 }
59325650
NB
1162
1163 return len;
6d2c2047
ZW
1164}
1165
041c3194 1166/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885 1167 already contain the enough space to hold the token's spelling.
6cf87ca4
ZW
1168 Returns a pointer to the character after the last character written.
1169 FIXME: Would be nice if we didn't need the PFILE argument. */
93c80368 1170unsigned char *
6cf87ca4
ZW
1171cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1172 unsigned char *buffer)
041c3194 1173{
96be6998 1174 switch (TOKEN_SPELL (token))
041c3194
ZW
1175 {
1176 case SPELL_OPERATOR:
1177 {
1178 const unsigned char *spelling;
1179 unsigned char c;
d6d5f795 1180
041c3194 1181 if (token->flags & DIGRAPH)
37b8524c
JDA
1182 spelling
1183 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
92936ecf
ZW
1184 else if (token->flags & NAMED_OP)
1185 goto spell_ident;
041c3194 1186 else
96be6998 1187 spelling = TOKEN_NAME (token);
df383483 1188
041c3194
ZW
1189 while ((c = *spelling++) != '\0')
1190 *buffer++ = c;
1191 }
1192 break;
d6d5f795 1193
47ad4138 1194 spell_ident:
041c3194 1195 case SPELL_IDENT:
a28c5035
NB
1196 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1197 buffer += NODE_LEN (token->val.node);
041c3194 1198 break;
d6d5f795 1199
6338b358 1200 case SPELL_LITERAL:
47ad4138
ZW
1201 memcpy (buffer, token->val.str.text, token->val.str.len);
1202 buffer += token->val.str.len;
1203 break;
1204
041c3194 1205 case SPELL_NONE:
0527bc4e
JDA
1206 cpp_error (pfile, CPP_DL_ICE,
1207 "unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1208 break;
1209 }
d6d5f795 1210
041c3194
ZW
1211 return buffer;
1212}
d6d5f795 1213
5d8ebbd8
NB
1214/* Returns TOKEN spelt as a null-terminated string. The string is
1215 freed when the reader is destroyed. Useful for diagnostics. */
93c80368 1216unsigned char *
6cf87ca4 1217cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
59325650
NB
1218{
1219 unsigned int len = cpp_token_len (token) + 1;
ece54d54 1220 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
c5a04734 1221
93c80368
NB
1222 end = cpp_spell_token (pfile, token, start);
1223 end[0] = '\0';
c5a04734 1224
93c80368
NB
1225 return start;
1226}
c5a04734 1227
5d8ebbd8
NB
1228/* Used by C front ends, which really should move to using
1229 cpp_token_as_text. */
93c80368 1230const char *
6cf87ca4 1231cpp_type2name (enum cpp_ttype type)
93c80368
NB
1232{
1233 return (const char *) token_spellings[type].name;
1234}
c5a04734 1235
4ed5bcfb
NB
1236/* Writes the spelling of token to FP, without any preceding space.
1237 Separated from cpp_spell_token for efficiency - to avoid stdio
1238 double-buffering. */
93c80368 1239void
6cf87ca4 1240cpp_output_token (const cpp_token *token, FILE *fp)
93c80368 1241{
93c80368 1242 switch (TOKEN_SPELL (token))
c5a04734 1243 {
93c80368
NB
1244 case SPELL_OPERATOR:
1245 {
1246 const unsigned char *spelling;
3b681e9d 1247 int c;
c5a04734 1248
93c80368 1249 if (token->flags & DIGRAPH)
37b8524c
JDA
1250 spelling
1251 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
93c80368
NB
1252 else if (token->flags & NAMED_OP)
1253 goto spell_ident;
1254 else
1255 spelling = TOKEN_NAME (token);
041c3194 1256
3b681e9d
ZW
1257 c = *spelling;
1258 do
1259 putc (c, fp);
1260 while ((c = *++spelling) != '\0');
93c80368
NB
1261 }
1262 break;
041c3194 1263
93c80368
NB
1264 spell_ident:
1265 case SPELL_IDENT:
3b681e9d 1266 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
93c80368 1267 break;
041c3194 1268
6338b358 1269 case SPELL_LITERAL:
47ad4138
ZW
1270 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1271 break;
1272
93c80368
NB
1273 case SPELL_NONE:
1274 /* An error, most probably. */
1275 break;
041c3194 1276 }
c5a04734
ZW
1277}
1278
93c80368
NB
1279/* Compare two tokens. */
1280int
6cf87ca4 1281_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
c5a04734 1282{
93c80368
NB
1283 if (a->type == b->type && a->flags == b->flags)
1284 switch (TOKEN_SPELL (a))
1285 {
1286 default: /* Keep compiler happy. */
1287 case SPELL_OPERATOR:
1288 return 1;
93c80368 1289 case SPELL_NONE:
56051c0a 1290 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
93c80368
NB
1291 case SPELL_IDENT:
1292 return a->val.node == b->val.node;
6338b358 1293 case SPELL_LITERAL:
93c80368
NB
1294 return (a->val.str.len == b->val.str.len
1295 && !memcmp (a->val.str.text, b->val.str.text,
1296 a->val.str.len));
1297 }
c5a04734 1298
041c3194
ZW
1299 return 0;
1300}
1301
93c80368
NB
1302/* Returns nonzero if a space should be inserted to avoid an
1303 accidental token paste for output. For simplicity, it is
1304 conservative, and occasionally advises a space where one is not
1305 needed, e.g. "." and ".2". */
93c80368 1306int
6cf87ca4
ZW
1307cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1308 const cpp_token *token2)
c5a04734 1309{
93c80368
NB
1310 enum cpp_ttype a = token1->type, b = token2->type;
1311 cppchar_t c;
c5a04734 1312
93c80368
NB
1313 if (token1->flags & NAMED_OP)
1314 a = CPP_NAME;
1315 if (token2->flags & NAMED_OP)
1316 b = CPP_NAME;
c5a04734 1317
93c80368
NB
1318 c = EOF;
1319 if (token2->flags & DIGRAPH)
37b8524c 1320 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
1321 else if (token_spellings[b].category == SPELL_OPERATOR)
1322 c = token_spellings[b].name[0];
c5a04734 1323
93c80368 1324 /* Quickly get everything that can paste with an '='. */
37b8524c 1325 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 1326 return 1;
c5a04734 1327
93c80368 1328 switch (a)
c5a04734 1329 {
93c80368
NB
1330 case CPP_GREATER: return c == '>' || c == '?';
1331 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1332 case CPP_PLUS: return c == '+';
1333 case CPP_MINUS: return c == '-' || c == '>';
1334 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1335 case CPP_MOD: return c == ':' || c == '>';
1336 case CPP_AND: return c == '&';
1337 case CPP_OR: return c == '|';
1338 case CPP_COLON: return c == ':' || c == '>';
1339 case CPP_DEREF: return c == '*';
26ec42ee 1340 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
1341 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1342 case CPP_NAME: return ((b == CPP_NUMBER
1343 && name_p (pfile, &token2->val.str))
1344 || b == CPP_NAME
1345 || b == CPP_CHAR || b == CPP_STRING); /* L */
1346 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1347 || c == '.' || c == '+' || c == '-');
1613e52b 1348 /* UCNs */
1067694a
NB
1349 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1350 && b == CPP_NAME)
1613e52b 1351 || (CPP_OPTION (pfile, objc)
1067694a 1352 && token1->val.str.text[0] == '@'
1613e52b 1353 && (b == CPP_NAME || b == CPP_STRING)));
93c80368 1354 default: break;
c5a04734 1355 }
c5a04734 1356
417f3e3a 1357 return 0;
c5a04734
ZW
1358}
1359
93c80368 1360/* Output all the remaining tokens on the current line, and a newline
4ed5bcfb
NB
1361 character, to FP. Leading whitespace is removed. If there are
1362 macros, special token padding is not performed. */
c5a04734 1363void
6cf87ca4 1364cpp_output_line (cpp_reader *pfile, FILE *fp)
c5a04734 1365{
4ed5bcfb 1366 const cpp_token *token;
96be6998 1367
4ed5bcfb
NB
1368 token = cpp_get_token (pfile);
1369 while (token->type != CPP_EOF)
96be6998 1370 {
4ed5bcfb
NB
1371 cpp_output_token (token, fp);
1372 token = cpp_get_token (pfile);
1373 if (token->flags & PREV_WHITE)
1374 putc (' ', fp);
96be6998
ZW
1375 }
1376
93c80368 1377 putc ('\n', fp);
041c3194 1378}
c5a04734 1379
1e013d2e
NB
1380/* Memory buffers. Changing these three constants can have a dramatic
1381 effect on performance. The values here are reasonable defaults,
1382 but might be tuned. If you adjust them, be sure to test across a
1383 range of uses of cpplib, including heavy nested function-like macro
1384 expansion. Also check the change in peak memory usage (NJAMD is a
1385 good tool for this). */
1386#define MIN_BUFF_SIZE 8000
87062813 1387#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1e013d2e
NB
1388#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1389 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
417f3e3a 1390
87062813
NB
1391#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1392 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1393#endif
1394
c9e7a609
NB
1395/* Create a new allocation buffer. Place the control block at the end
1396 of the buffer, so that buffer overflows will cause immediate chaos. */
b8af0ca5 1397static _cpp_buff *
6cf87ca4 1398new_buff (size_t len)
b8af0ca5
NB
1399{
1400 _cpp_buff *result;
ece54d54 1401 unsigned char *base;
b8af0ca5 1402
1e013d2e
NB
1403 if (len < MIN_BUFF_SIZE)
1404 len = MIN_BUFF_SIZE;
c70f6ed3 1405 len = CPP_ALIGN (len);
b8af0ca5
NB
1406
1407 base = xmalloc (len + sizeof (_cpp_buff));
1408 result = (_cpp_buff *) (base + len);
1409 result->base = base;
1410 result->cur = base;
1411 result->limit = base + len;
1412 result->next = NULL;
1413 return result;
1414}
1415
1416/* Place a chain of unwanted allocation buffers on the free list. */
1417void
6cf87ca4 1418_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
b8af0ca5
NB
1419{
1420 _cpp_buff *end = buff;
1421
1422 while (end->next)
1423 end = end->next;
1424 end->next = pfile->free_buffs;
1425 pfile->free_buffs = buff;
1426}
1427
1428/* Return a free buffer of size at least MIN_SIZE. */
1429_cpp_buff *
6cf87ca4 1430_cpp_get_buff (cpp_reader *pfile, size_t min_size)
b8af0ca5
NB
1431{
1432 _cpp_buff *result, **p;
1433
1434 for (p = &pfile->free_buffs;; p = &(*p)->next)
1435 {
6142088c 1436 size_t size;
1e013d2e
NB
1437
1438 if (*p == NULL)
b8af0ca5 1439 return new_buff (min_size);
1e013d2e
NB
1440 result = *p;
1441 size = result->limit - result->base;
1442 /* Return a buffer that's big enough, but don't waste one that's
1443 way too big. */
34f5271d 1444 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
b8af0ca5
NB
1445 break;
1446 }
1447
1448 *p = result->next;
1449 result->next = NULL;
1450 result->cur = result->base;
1451 return result;
1452}
1453
4fe9b91c 1454/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
1455 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1456 the excess bytes to the new buffer. Chains the new buffer after
1457 BUFF, and returns the new buffer. */
b8af0ca5 1458_cpp_buff *
6cf87ca4 1459_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
b8af0ca5 1460{
6142088c 1461 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
8c3b2693 1462 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
b8af0ca5 1463
8c3b2693
NB
1464 buff->next = new_buff;
1465 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1466 return new_buff;
1467}
1468
4fe9b91c 1469/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
1470 remaining bytes of the buffer pointed to by BUFF, and at least
1471 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1472 Chains the new buffer before the buffer pointed to by BUFF, and
1473 updates the pointer to point to the new buffer. */
1474void
6cf87ca4 1475_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
8c3b2693
NB
1476{
1477 _cpp_buff *new_buff, *old_buff = *pbuff;
1478 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1479
1480 new_buff = _cpp_get_buff (pfile, size);
1481 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1482 new_buff->next = old_buff;
1483 *pbuff = new_buff;
b8af0ca5
NB
1484}
1485
1486/* Free a chain of buffers starting at BUFF. */
1487void
5671bf27 1488_cpp_free_buff (_cpp_buff *buff)
b8af0ca5
NB
1489{
1490 _cpp_buff *next;
1491
1492 for (; buff; buff = next)
1493 {
1494 next = buff->next;
1495 free (buff->base);
1496 }
1497}
417f3e3a 1498
ece54d54
NB
1499/* Allocate permanent, unaligned storage of length LEN. */
1500unsigned char *
6cf87ca4 1501_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
ece54d54
NB
1502{
1503 _cpp_buff *buff = pfile->u_buff;
1504 unsigned char *result = buff->cur;
1505
1506 if (len > (size_t) (buff->limit - result))
1507 {
1508 buff = _cpp_get_buff (pfile, len);
1509 buff->next = pfile->u_buff;
1510 pfile->u_buff = buff;
1511 result = buff->cur;
1512 }
1513
1514 buff->cur = result + len;
1515 return result;
1516}
1517
87062813
NB
1518/* Allocate permanent, unaligned storage of length LEN from a_buff.
1519 That buffer is used for growing allocations when saving macro
1520 replacement lists in a #define, and when parsing an answer to an
1521 assertion in #assert, #unassert or #if (and therefore possibly
1522 whilst expanding macros). It therefore must not be used by any
1523 code that they might call: specifically the lexer and the guts of
1524 the macro expander.
1525
1526 All existing other uses clearly fit this restriction: storing
1527 registered pragmas during initialization. */
93c80368 1528unsigned char *
6cf87ca4 1529_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
3fef5b2b 1530{
8c3b2693
NB
1531 _cpp_buff *buff = pfile->a_buff;
1532 unsigned char *result = buff->cur;
3fef5b2b 1533
8c3b2693 1534 if (len > (size_t) (buff->limit - result))
3fef5b2b 1535 {
8c3b2693
NB
1536 buff = _cpp_get_buff (pfile, len);
1537 buff->next = pfile->a_buff;
1538 pfile->a_buff = buff;
1539 result = buff->cur;
3fef5b2b 1540 }
041c3194 1541
8c3b2693 1542 buff->cur = result + len;
93c80368 1543 return result;
041c3194 1544}
This page took 1.166302 seconds and 5 git commands to generate.