]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
solaris_threads.c (MAX_ORIG_STACK_SIZE): Provide special Solaris 2/Intel definition.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
93c80368
NB
23/* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
27
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
041c3194 36
45b966db
ZW
37#include "config.h"
38#include "system.h"
45b966db
ZW
39#include "cpplib.h"
40#include "cpphash.h"
41
c8a96070
NB
42/* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
45#ifdef CROSS_COMPILE
46#undef MULTIBYTE_CHARS
47#endif
48
49#ifdef MULTIBYTE_CHARS
50#include "mbchar.h"
51#include <locale.h>
52#endif
53
93c80368
NB
54/* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
56enum spell_type
f9a0e96c 57{
93c80368
NB
58 SPELL_OPERATOR = 0,
59 SPELL_CHAR,
60 SPELL_IDENT,
61 SPELL_STRING,
62 SPELL_NONE
f9a0e96c
ZW
63};
64
93c80368 65struct token_spelling
f9a0e96c 66{
93c80368
NB
67 enum spell_type category;
68 const unsigned char *name;
f9a0e96c
ZW
69};
70
93c80368
NB
71const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
72 U":>", U"<%", U"%>"};
73
74#define OP(e, s) { SPELL_OPERATOR, U s },
75#define TK(e, s) { s, U STRINGX (e) },
76const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
77#undef OP
78#undef TK
79
80#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
f2d5f0cc 82
1444f2ed 83static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
29401c30
NB
84static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
85static cppchar_t get_effective_char PARAMS ((cpp_reader *));
0d9f234d 86
041c3194 87static int skip_block_comment PARAMS ((cpp_reader *));
cbcff6df 88static int skip_line_comment PARAMS ((cpp_reader *));
0d9f234d
NB
89static void adjust_column PARAMS ((cpp_reader *));
90static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
2c3fcba6
ZW
91static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
92static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
93 const U_CHAR *));
93c80368
NB
94static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
95static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
0d9f234d 96static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
93c80368 97static void unterminated PARAMS ((cpp_reader *, int));
0d9f234d
NB
98static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
99static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
29401c30 100static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
cbcff6df 101static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
93c80368 102static int name_p PARAMS ((cpp_reader *, const cpp_string *));
62729350
NB
103static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
104 const unsigned char *, unsigned int *));
5fddcffc 105static tokenrun *next_tokenrun PARAMS ((tokenrun *));
f617b8e2 106
93c80368
NB
107static cpp_chunk *new_chunk PARAMS ((unsigned int));
108static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
c8a96070 109static unsigned int hex_digit_value PARAMS ((unsigned int));
15dad1d9 110
041c3194 111/* Utility routine:
9e62c811 112
bfb9dc7f
ZW
113 Compares, the token TOKEN to the NUL-terminated string STRING.
114 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
15dad1d9 115
041c3194 116int
bfb9dc7f
ZW
117cpp_ideq (token, string)
118 const cpp_token *token;
041c3194
ZW
119 const char *string;
120{
bfb9dc7f 121 if (token->type != CPP_NAME)
041c3194 122 return 0;
bfb9dc7f 123
a28c5035 124 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
15dad1d9 125}
1368ee70 126
0d9f234d
NB
127/* Call when meeting a newline. Returns the character after the newline
128 (or carriage-return newline combination), or EOF. */
129static cppchar_t
1444f2ed
NB
130handle_newline (pfile, newline_char)
131 cpp_reader *pfile;
0d9f234d
NB
132 cppchar_t newline_char;
133{
1444f2ed 134 cpp_buffer *buffer;
0d9f234d
NB
135 cppchar_t next = EOF;
136
1444f2ed 137 pfile->line++;
1444f2ed 138 buffer = pfile->buffer;
0d9f234d 139 buffer->col_adjust = 0;
0d9f234d
NB
140 buffer->line_base = buffer->cur;
141
142 /* Handle CR-LF and LF-CR combinations, get the next character. */
143 if (buffer->cur < buffer->rlimit)
144 {
145 next = *buffer->cur++;
146 if (next + newline_char == '\r' + '\n')
147 {
148 buffer->line_base = buffer->cur;
149 if (buffer->cur < buffer->rlimit)
150 next = *buffer->cur++;
151 else
152 next = EOF;
153 }
154 }
155
156 buffer->read_ahead = next;
157 return next;
158}
159
160/* Subroutine of skip_escaped_newlines; called when a trigraph is
161 encountered. It warns if necessary, and returns true if the
162 trigraph should be honoured. FROM_CHAR is the third character of a
163 trigraph, and presumed to be the previous character for position
164 reporting. */
45b966db 165static int
0d9f234d 166trigraph_ok (pfile, from_char)
45b966db 167 cpp_reader *pfile;
0d9f234d 168 cppchar_t from_char;
45b966db 169{
041c3194
ZW
170 int accept = CPP_OPTION (pfile, trigraphs);
171
cbcff6df
NB
172 /* Don't warn about trigraphs in comments. */
173 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
45b966db 174 {
0d9f234d 175 cpp_buffer *buffer = pfile->buffer;
67821e3a 176
041c3194 177 if (accept)
67821e3a 178 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
041c3194 179 "trigraph ??%c converted to %c",
0d9f234d
NB
180 (int) from_char,
181 (int) _cpp_trigraph_map[from_char]);
4a5b68a2
NB
182 else if (buffer->cur != buffer->last_Wtrigraphs)
183 {
184 buffer->last_Wtrigraphs = buffer->cur;
67821e3a 185 cpp_warning_with_line (pfile, pfile->line,
4a5b68a2
NB
186 CPP_BUF_COL (buffer) - 2,
187 "trigraph ??%c ignored", (int) from_char);
188 }
45b966db 189 }
0d9f234d 190
041c3194 191 return accept;
45b966db
ZW
192}
193
0d9f234d
NB
194/* Assumes local variables buffer and result. */
195#define ACCEPT_CHAR(t) \
196 do { result->type = t; buffer->read_ahead = EOF; } while (0)
197
198/* When we move to multibyte character sets, add to these something
199 that saves and restores the state of the multibyte conversion
200 library. This probably involves saving and restoring a "cookie".
201 In the case of glibc it is an 8-byte structure, so is not a high
202 overhead operation. In any case, it's out of the fast path. */
203#define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
204#define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
205
206/* Skips any escaped newlines introduced by NEXT, which is either a
207 '?' or a '\\'. Returns the next character, which will also have
a5c3cccd
NB
208 been placed in buffer->read_ahead. This routine performs
209 preprocessing stages 1 and 2 of the ISO C standard. */
0d9f234d 210static cppchar_t
29401c30
NB
211skip_escaped_newlines (pfile, next)
212 cpp_reader *pfile;
0d9f234d 213 cppchar_t next;
45b966db 214{
29401c30
NB
215 cpp_buffer *buffer = pfile->buffer;
216
a5c3cccd
NB
217 /* Only do this if we apply stages 1 and 2. */
218 if (!buffer->from_stage3)
041c3194 219 {
a5c3cccd
NB
220 cppchar_t next1;
221 const unsigned char *saved_cur;
222 int space;
223
224 do
0d9f234d 225 {
a5c3cccd
NB
226 if (buffer->cur == buffer->rlimit)
227 break;
228
229 SAVE_STATE ();
230 if (next == '?')
0d9f234d 231 {
a5c3cccd
NB
232 next1 = *buffer->cur++;
233 if (next1 != '?' || buffer->cur == buffer->rlimit)
234 {
235 RESTORE_STATE ();
236 break;
237 }
238
239 next1 = *buffer->cur++;
240 if (!_cpp_trigraph_map[next1]
29401c30 241 || !trigraph_ok (pfile, next1))
a5c3cccd
NB
242 {
243 RESTORE_STATE ();
244 break;
245 }
246
247 /* We have a full trigraph here. */
248 next = _cpp_trigraph_map[next1];
249 if (next != '\\' || buffer->cur == buffer->rlimit)
250 break;
251 SAVE_STATE ();
252 }
253
254 /* We have a backslash, and room for at least one more character. */
255 space = 0;
256 do
257 {
258 next1 = *buffer->cur++;
259 if (!is_nvspace (next1))
260 break;
261 space = 1;
0d9f234d 262 }
a5c3cccd 263 while (buffer->cur < buffer->rlimit);
041c3194 264
a5c3cccd 265 if (!is_vspace (next1))
0d9f234d
NB
266 {
267 RESTORE_STATE ();
268 break;
269 }
45b966db 270
29401c30
NB
271 if (space && !pfile->state.lexing_comment)
272 cpp_warning (pfile, "backslash and newline separated by space");
0d9f234d 273
29401c30 274 next = handle_newline (pfile, next1);
a5c3cccd 275 if (next == EOF)
29401c30 276 cpp_pedwarn (pfile, "backslash-newline at end of file");
0d9f234d 277 }
a5c3cccd 278 while (next == '\\' || next == '?');
041c3194 279 }
45b966db 280
0d9f234d
NB
281 buffer->read_ahead = next;
282 return next;
45b966db
ZW
283}
284
0d9f234d
NB
285/* Obtain the next character, after trigraph conversion and skipping
286 an arbitrary string of escaped newlines. The common case of no
287 trigraphs or escaped newlines falls through quickly. */
288static cppchar_t
29401c30
NB
289get_effective_char (pfile)
290 cpp_reader *pfile;
64aaf407 291{
29401c30 292 cpp_buffer *buffer = pfile->buffer;
0d9f234d
NB
293 cppchar_t next = EOF;
294
295 if (buffer->cur < buffer->rlimit)
296 {
297 next = *buffer->cur++;
298
299 /* '?' can introduce trigraphs (and therefore backslash); '\\'
300 can introduce escaped newlines, which we want to skip, or
301 UCNs, which, depending upon lexer state, we will handle in
302 the future. */
303 if (next == '?' || next == '\\')
29401c30 304 next = skip_escaped_newlines (pfile, next);
0d9f234d
NB
305 }
306
307 buffer->read_ahead = next;
308 return next;
64aaf407
NB
309}
310
0d9f234d
NB
311/* Skip a C-style block comment. We find the end of the comment by
312 seeing if an asterisk is before every '/' we encounter. Returns
313 non-zero if comment terminated by EOF, zero otherwise. */
041c3194
ZW
314static int
315skip_block_comment (pfile)
45b966db
ZW
316 cpp_reader *pfile;
317{
041c3194 318 cpp_buffer *buffer = pfile->buffer;
d8090680 319 cppchar_t c = EOF, prevc = EOF;
0d9f234d 320
cbcff6df 321 pfile->state.lexing_comment = 1;
0d9f234d 322 while (buffer->cur != buffer->rlimit)
45b966db 323 {
0d9f234d
NB
324 prevc = c, c = *buffer->cur++;
325
326 next_char:
327 /* FIXME: For speed, create a new character class of characters
93c80368 328 of interest inside block comments. */
0d9f234d 329 if (c == '?' || c == '\\')
29401c30 330 c = skip_escaped_newlines (pfile, c);
041c3194 331
0d9f234d
NB
332 /* People like decorating comments with '*', so check for '/'
333 instead for efficiency. */
041c3194 334 if (c == '/')
45b966db 335 {
0d9f234d
NB
336 if (prevc == '*')
337 break;
041c3194 338
0d9f234d
NB
339 /* Warn about potential nested comments, but not if the '/'
340 comes immediately before the true comment delimeter.
041c3194 341 Don't bother to get it right across escaped newlines. */
0d9f234d
NB
342 if (CPP_OPTION (pfile, warn_comments)
343 && buffer->cur != buffer->rlimit)
45b966db 344 {
0d9f234d
NB
345 prevc = c, c = *buffer->cur++;
346 if (c == '*' && buffer->cur != buffer->rlimit)
347 {
348 prevc = c, c = *buffer->cur++;
349 if (c != '/')
67821e3a
NB
350 cpp_warning_with_line (pfile, pfile->line,
351 CPP_BUF_COL (buffer) - 2,
0d9f234d
NB
352 "\"/*\" within comment");
353 }
354 goto next_char;
45b966db 355 }
45b966db 356 }
91fcd158 357 else if (is_vspace (c))
45b966db 358 {
1444f2ed 359 prevc = c, c = handle_newline (pfile, c);
0d9f234d 360 goto next_char;
45b966db 361 }
52fadca8 362 else if (c == '\t')
0d9f234d 363 adjust_column (pfile);
45b966db 364 }
041c3194 365
cbcff6df 366 pfile->state.lexing_comment = 0;
0d9f234d
NB
367 buffer->read_ahead = EOF;
368 return c != '/' || prevc != '*';
45b966db
ZW
369}
370
f9a0e96c 371/* Skip a C++ line comment. Handles escaped newlines. Returns
0d9f234d
NB
372 non-zero if a multiline comment. The following new line, if any,
373 is left in buffer->read_ahead. */
041c3194 374static int
cbcff6df
NB
375skip_line_comment (pfile)
376 cpp_reader *pfile;
45b966db 377{
cbcff6df 378 cpp_buffer *buffer = pfile->buffer;
67821e3a 379 unsigned int orig_line = pfile->line;
0d9f234d 380 cppchar_t c;
041c3194 381
cbcff6df 382 pfile->state.lexing_comment = 1;
0d9f234d 383 do
041c3194 384 {
0d9f234d
NB
385 c = EOF;
386 if (buffer->cur == buffer->rlimit)
387 break;
041c3194 388
0d9f234d
NB
389 c = *buffer->cur++;
390 if (c == '?' || c == '\\')
29401c30 391 c = skip_escaped_newlines (pfile, c);
041c3194 392 }
0d9f234d 393 while (!is_vspace (c));
45b966db 394
cbcff6df 395 pfile->state.lexing_comment = 0;
0d9f234d 396 buffer->read_ahead = c; /* Leave any newline for caller. */
67821e3a 397 return orig_line != pfile->line;
041c3194 398}
45b966db 399
0d9f234d
NB
400/* pfile->buffer->cur is one beyond the \t character. Update
401 col_adjust so we track the column correctly. */
52fadca8 402static void
0d9f234d 403adjust_column (pfile)
52fadca8 404 cpp_reader *pfile;
52fadca8 405{
0d9f234d
NB
406 cpp_buffer *buffer = pfile->buffer;
407 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
52fadca8
NB
408
409 /* Round it up to multiple of the tabstop, but subtract 1 since the
410 tab itself occupies a character position. */
0d9f234d
NB
411 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
412 - col % CPP_OPTION (pfile, tabstop)) - 1;
52fadca8
NB
413}
414
0d9f234d
NB
415/* Skips whitespace, saving the next non-whitespace character.
416 Adjusts pfile->col_adjust to account for tabs. Without this,
417 tokens might be assigned an incorrect column. */
041c3194 418static void
0d9f234d 419skip_whitespace (pfile, c)
041c3194 420 cpp_reader *pfile;
0d9f234d 421 cppchar_t c;
041c3194
ZW
422{
423 cpp_buffer *buffer = pfile->buffer;
0d9f234d 424 unsigned int warned = 0;
45b966db 425
0d9f234d 426 do
041c3194 427 {
91fcd158
NB
428 /* Horizontal space always OK. */
429 if (c == ' ')
0d9f234d 430 ;
91fcd158 431 else if (c == '\t')
0d9f234d
NB
432 adjust_column (pfile);
433 /* Just \f \v or \0 left. */
91fcd158 434 else if (c == '\0')
041c3194 435 {
91fcd158 436 if (!warned)
0d9f234d
NB
437 {
438 cpp_warning (pfile, "null character(s) ignored");
439 warned = 1;
440 }
45b966db 441 }
93c80368 442 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
67821e3a 443 cpp_pedwarn_with_line (pfile, pfile->line,
91fcd158
NB
444 CPP_BUF_COL (buffer),
445 "%s in preprocessing directive",
446 c == '\f' ? "form feed" : "vertical tab");
0d9f234d
NB
447
448 c = EOF;
449 if (buffer->cur == buffer->rlimit)
450 break;
451 c = *buffer->cur++;
45b966db 452 }
ec5c56db 453 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
0d9f234d
NB
454 while (is_nvspace (c));
455
456 /* Remember the next character. */
457 buffer->read_ahead = c;
041c3194 458}
45b966db 459
93c80368
NB
460/* See if the characters of a number token are valid in a name (no
461 '.', '+' or '-'). */
462static int
463name_p (pfile, string)
464 cpp_reader *pfile;
465 const cpp_string *string;
466{
467 unsigned int i;
468
469 for (i = 0; i < string->len; i++)
470 if (!is_idchar (string->text[i]))
471 return 0;
472
473 return 1;
474}
475
2c3fcba6
ZW
476/* Parse an identifier, skipping embedded backslash-newlines. This is
477 a critical inner loop. The common case is an identifier which has
478 not been split by backslash-newline, does not contain a dollar
479 sign, and has already been scanned (roughly 10:1 ratio of
480 seen:unseen identifiers in normal code; the distribution is
481 Poisson-like). Second most common case is a new identifier, not
482 split and no dollar sign. The other possibilities are rare and
483 have been relegated to parse_identifier_slow. */
0d9f234d
NB
484
485static cpp_hashnode *
2c3fcba6 486parse_identifier (pfile)
45b966db 487 cpp_reader *pfile;
45b966db 488{
93c80368 489 cpp_hashnode *result;
2c3fcba6
ZW
490 const U_CHAR *cur, *rlimit;
491
492 /* Fast-path loop. Skim over a normal identifier.
493 N.B. ISIDNUM does not include $. */
494 cur = pfile->buffer->cur - 1;
495 rlimit = pfile->buffer->rlimit;
496 do
497 cur++;
498 while (cur < rlimit && ISIDNUM (*cur));
499
500 /* Check for slow-path cases. */
501 if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
502 result = parse_identifier_slow (pfile, cur);
503 else
504 {
505 const U_CHAR *base = pfile->buffer->cur - 1;
506 result = (cpp_hashnode *)
507 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
508 pfile->buffer->cur = cur;
509 }
510
511 /* Rarely, identifiers require diagnostics when lexed.
512 XXX Has to be forced out of the fast path. */
513 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
514 && !pfile->state.skipping, 0))
515 {
516 /* It is allowed to poison the same identifier twice. */
517 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
518 cpp_error (pfile, "attempt to use poisoned \"%s\"",
519 NODE_NAME (result));
520
521 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
522 replacement list of a variadic macro. */
523 if (result == pfile->spec_nodes.n__VA_ARGS__
524 && !pfile->state.va_args_ok)
525 cpp_pedwarn (pfile,
526 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
527 }
528
529 return result;
530}
531
532/* Slow path. This handles identifiers which have been split, and
533 identifiers which contain dollar signs. The part of the identifier
534 from PFILE->buffer->cur-1 to CUR has already been scanned. */
535static cpp_hashnode *
536parse_identifier_slow (pfile, cur)
537 cpp_reader *pfile;
538 const U_CHAR *cur;
539{
0d9f234d 540 cpp_buffer *buffer = pfile->buffer;
2c3fcba6 541 const U_CHAR *base = buffer->cur - 1;
2a967f3d 542 struct obstack *stack = &pfile->hash_table->stack;
2c3fcba6
ZW
543 unsigned int c, saw_dollar = 0, len;
544
545 /* Copy the part of the token which is known to be okay. */
546 obstack_grow (stack, base, cur - base);
041c3194 547
2c3fcba6
ZW
548 /* Now process the part which isn't. We are looking at one of
549 '$', '\\', or '?' on entry to this loop. */
550 c = *cur++;
551 buffer->cur = cur;
0d9f234d 552 do
041c3194 553 {
2c3fcba6
ZW
554 while (is_idchar (c))
555 {
556 obstack_1grow (stack, c);
45b966db 557
2c3fcba6
ZW
558 if (c == '$')
559 saw_dollar++;
ba89d661 560
2c3fcba6
ZW
561 c = EOF;
562 if (buffer->cur == buffer->rlimit)
563 break;
ba89d661 564
2c3fcba6
ZW
565 c = *buffer->cur++;
566 }
ba89d661 567
0d9f234d
NB
568 /* Potential escaped newline? */
569 if (c != '?' && c != '\\')
2c3fcba6 570 break;
29401c30 571 c = skip_escaped_newlines (pfile, c);
041c3194 572 }
0d9f234d
NB
573 while (is_idchar (c));
574
93c80368
NB
575 /* Remember the next character. */
576 buffer->read_ahead = c;
577
0d9f234d
NB
578 /* $ is not a identifier character in the standard, but is commonly
579 accepted as an extension. Don't warn about it in skipped
580 conditional blocks. */
cef0d199 581 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
0d9f234d
NB
582 cpp_pedwarn (pfile, "'$' character(s) in identifier");
583
93c80368 584 /* Identifiers are null-terminated. */
2a967f3d
NB
585 len = obstack_object_size (stack);
586 obstack_1grow (stack, '\0');
93c80368 587
2c3fcba6 588 return (cpp_hashnode *)
2a967f3d 589 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
45b966db
ZW
590}
591
0d9f234d 592/* Parse a number, skipping embedded backslash-newlines. */
45b966db 593static void
93c80368 594parse_number (pfile, number, c, leading_period)
45b966db 595 cpp_reader *pfile;
0d9f234d
NB
596 cpp_string *number;
597 cppchar_t c;
93c80368 598 int leading_period;
45b966db 599{
041c3194 600 cpp_buffer *buffer = pfile->buffer;
49fe13f6 601 cpp_pool *pool = &pfile->ident_pool;
93c80368 602 unsigned char *dest, *limit;
45b966db 603
93c80368
NB
604 dest = POOL_FRONT (pool);
605 limit = POOL_LIMIT (pool);
cbcff6df 606
93c80368
NB
607 /* Place a leading period. */
608 if (leading_period)
609 {
610 if (dest >= limit)
611 limit = _cpp_next_chunk (pool, 0, &dest);
612 *dest++ = '.';
613 }
614
0d9f234d 615 do
041c3194 616 {
0d9f234d
NB
617 do
618 {
93c80368
NB
619 /* Need room for terminating null. */
620 if (dest + 1 >= limit)
621 limit = _cpp_next_chunk (pool, 0, &dest);
622 *dest++ = c;
0d9f234d 623
0d9f234d
NB
624 c = EOF;
625 if (buffer->cur == buffer->rlimit)
626 break;
45b966db 627
0d9f234d
NB
628 c = *buffer->cur++;
629 }
93c80368 630 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
45b966db 631
0d9f234d
NB
632 /* Potential escaped newline? */
633 if (c != '?' && c != '\\')
634 break;
29401c30 635 c = skip_escaped_newlines (pfile, c);
45b966db 636 }
93c80368 637 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
cbcff6df 638
0d9f234d
NB
639 /* Remember the next character. */
640 buffer->read_ahead = c;
64aaf407 641
93c80368
NB
642 /* Null-terminate the number. */
643 *dest = '\0';
644
645 number->text = POOL_FRONT (pool);
646 number->len = dest - number->text;
647 POOL_COMMIT (pool, number->len + 1);
0d9f234d
NB
648}
649
650/* Subroutine of parse_string. Emits error for unterminated strings. */
651static void
93c80368 652unterminated (pfile, term)
0d9f234d 653 cpp_reader *pfile;
0d9f234d
NB
654 int term;
655{
656 cpp_error (pfile, "missing terminating %c character", term);
657
50410426 658 if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
041c3194 659 {
50410426 660 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
0d9f234d 661 "possible start of unterminated string literal");
50410426 662 pfile->mls_line = 0;
041c3194 663 }
45b966db
ZW
664}
665
93c80368
NB
666/* Subroutine of parse_string. */
667static int
668unescaped_terminator_p (pfile, dest)
669 cpp_reader *pfile;
670 const unsigned char *dest;
671{
672 const unsigned char *start, *temp;
673
674 /* In #include-style directives, terminators are not escapeable. */
675 if (pfile->state.angled_headers)
676 return 1;
677
49fe13f6 678 start = POOL_FRONT (&pfile->ident_pool);
93c80368
NB
679
680 /* An odd number of consecutive backslashes represents an escaped
681 terminator. */
682 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
683 ;
684
685 return ((dest - temp) & 1) == 0;
686}
687
0d9f234d 688/* Parses a string, character constant, or angle-bracketed header file
7868b4a2
NB
689 name. Handles embedded trigraphs and escaped newlines. The stored
690 string is guaranteed NUL-terminated, but it is not guaranteed that
691 this is the first NUL since embedded NULs are preserved.
45b966db 692
7868b4a2 693 Multi-line strings are allowed, but they are deprecated. */
041c3194 694static void
0d9f234d 695parse_string (pfile, token, terminator)
45b966db 696 cpp_reader *pfile;
041c3194 697 cpp_token *token;
0d9f234d 698 cppchar_t terminator;
45b966db 699{
041c3194 700 cpp_buffer *buffer = pfile->buffer;
49fe13f6 701 cpp_pool *pool = &pfile->ident_pool;
93c80368 702 unsigned char *dest, *limit;
0d9f234d 703 cppchar_t c;
d82fc108 704 bool warned_nulls = false, warned_multi = false;
0d9f234d 705
93c80368
NB
706 dest = POOL_FRONT (pool);
707 limit = POOL_LIMIT (pool);
708
0d9f234d 709 for (;;)
45b966db 710 {
0d9f234d 711 if (buffer->cur == buffer->rlimit)
7868b4a2
NB
712 c = EOF;
713 else
714 c = *buffer->cur++;
715
716 have_char:
717 /* We need space for the terminating NUL. */
718 if (dest >= limit)
719 limit = _cpp_next_chunk (pool, 0, &dest);
720
721 if (c == EOF)
0d9f234d 722 {
93c80368 723 unterminated (pfile, terminator);
0d9f234d
NB
724 break;
725 }
0d9f234d 726
0d9f234d
NB
727 /* Handle trigraphs, escaped newlines etc. */
728 if (c == '?' || c == '\\')
29401c30 729 c = skip_escaped_newlines (pfile, c);
45b966db 730
93c80368 731 if (c == terminator && unescaped_terminator_p (pfile, dest))
45b966db 732 {
93c80368
NB
733 c = EOF;
734 break;
0d9f234d
NB
735 }
736 else if (is_vspace (c))
737 {
738 /* In assembly language, silently terminate string and
739 character literals at end of line. This is a kludge
740 around not knowing where comments are. */
bdb05a7b 741 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
0d9f234d 742 break;
45b966db 743
0d9f234d
NB
744 /* Character constants and header names may not extend over
745 multiple lines. In Standard C, neither may strings.
746 Unfortunately, we accept multiline strings as an
16eb2788
NB
747 extension, except in #include family directives. */
748 if (terminator != '"' || pfile->state.angled_headers)
45b966db 749 {
93c80368 750 unterminated (pfile, terminator);
0d9f234d 751 break;
45b966db 752 }
45b966db 753
d82fc108
NB
754 if (!warned_multi)
755 {
756 warned_multi = true;
757 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
758 }
759
50410426
NB
760 if (pfile->mls_line == 0)
761 {
762 pfile->mls_line = token->line;
763 pfile->mls_col = token->col;
764 }
0d9f234d 765
1444f2ed 766 c = handle_newline (pfile, c);
7868b4a2
NB
767 *dest++ = '\n';
768 goto have_char;
0d9f234d 769 }
d82fc108 770 else if (c == '\0' && !warned_nulls)
0d9f234d 771 {
d82fc108
NB
772 warned_nulls = true;
773 cpp_warning (pfile, "null character(s) preserved in literal");
45b966db 774 }
45b966db 775
93c80368 776 *dest++ = c;
45b966db
ZW
777 }
778
93c80368 779 /* Remember the next character. */
0d9f234d 780 buffer->read_ahead = c;
7868b4a2 781 *dest = '\0';
45b966db 782
93c80368
NB
783 token->val.str.text = POOL_FRONT (pool);
784 token->val.str.len = dest - token->val.str.text;
7868b4a2 785 POOL_COMMIT (pool, token->val.str.len + 1);
0d9f234d 786}
041c3194 787
93c80368 788/* The stored comment includes the comment start and any terminator. */
9e62c811 789static void
0d9f234d
NB
790save_comment (pfile, token, from)
791 cpp_reader *pfile;
041c3194
ZW
792 cpp_token *token;
793 const unsigned char *from;
9e62c811 794{
041c3194 795 unsigned char *buffer;
0d9f234d 796 unsigned int len;
0d9f234d 797
1c6d33ef 798 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
3542203b
NB
799 /* C++ comments probably (not definitely) have moved past a new
800 line, which we don't want to save in the comment. */
801 if (pfile->buffer->read_ahead != EOF)
802 len--;
49fe13f6 803 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
041c3194 804
041c3194 805 token->type = CPP_COMMENT;
bfb9dc7f 806 token->val.str.len = len;
0d9f234d 807 token->val.str.text = buffer;
45b966db 808
1c6d33ef
NB
809 buffer[0] = '/';
810 memcpy (buffer + 1, from, len - 1);
0d9f234d 811}
45b966db 812
14baae01 813/* Subroutine of _cpp_lex_direct to handle '%'. A little tricky, since we
cbcff6df 814 want to avoid stepping back when lexing %:%X. */
0d9f234d 815static void
29401c30
NB
816lex_percent (pfile, result)
817 cpp_reader *pfile;
0d9f234d 818 cpp_token *result;
0d9f234d 819{
29401c30 820 cpp_buffer *buffer= pfile->buffer;
cbcff6df
NB
821 cppchar_t c;
822
823 result->type = CPP_MOD;
824 /* Parsing %:%X could leave an extra character. */
825 if (buffer->extra_char == EOF)
29401c30 826 c = get_effective_char (pfile);
cbcff6df
NB
827 else
828 {
829 c = buffer->read_ahead = buffer->extra_char;
830 buffer->extra_char = EOF;
831 }
832
833 if (c == '=')
834 ACCEPT_CHAR (CPP_MOD_EQ);
29401c30 835 else if (CPP_OPTION (pfile, digraphs))
cbcff6df
NB
836 {
837 if (c == ':')
838 {
839 result->flags |= DIGRAPH;
840 ACCEPT_CHAR (CPP_HASH);
29401c30 841 if (get_effective_char (pfile) == '%')
cbcff6df 842 {
29401c30 843 buffer->extra_char = get_effective_char (pfile);
cbcff6df
NB
844 if (buffer->extra_char == ':')
845 {
846 buffer->extra_char = EOF;
847 ACCEPT_CHAR (CPP_PASTE);
848 }
849 else
850 /* We'll catch the extra_char when we're called back. */
851 buffer->read_ahead = '%';
852 }
853 }
854 else if (c == '>')
855 {
856 result->flags |= DIGRAPH;
857 ACCEPT_CHAR (CPP_CLOSE_BRACE);
858 }
859 }
860}
861
14baae01 862/* Subroutine of _cpp_lex_direct to handle '.'. This is tricky, since we
cbcff6df
NB
863 want to avoid stepping back when lexing '...' or '.123'. In the
864 latter case we should also set a flag for parse_number. */
865static void
866lex_dot (pfile, result)
867 cpp_reader *pfile;
868 cpp_token *result;
869{
870 cpp_buffer *buffer = pfile->buffer;
871 cppchar_t c;
872
873 /* Parsing ..X could leave an extra character. */
874 if (buffer->extra_char == EOF)
29401c30 875 c = get_effective_char (pfile);
cbcff6df
NB
876 else
877 {
878 c = buffer->read_ahead = buffer->extra_char;
879 buffer->extra_char = EOF;
880 }
0d9f234d 881
cbcff6df
NB
882 /* All known character sets have 0...9 contiguous. */
883 if (c >= '0' && c <= '9')
884 {
885 result->type = CPP_NUMBER;
93c80368 886 parse_number (pfile, &result->val.str, c, 1);
cbcff6df 887 }
041c3194 888 else
ea4a453b 889 {
cbcff6df
NB
890 result->type = CPP_DOT;
891 if (c == '.')
892 {
29401c30 893 buffer->extra_char = get_effective_char (pfile);
cbcff6df
NB
894 if (buffer->extra_char == '.')
895 {
896 buffer->extra_char = EOF;
897 ACCEPT_CHAR (CPP_ELLIPSIS);
898 }
899 else
900 /* We'll catch the extra_char when we're called back. */
901 buffer->read_ahead = '.';
902 }
903 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
904 ACCEPT_CHAR (CPP_DOT_STAR);
ea4a453b 905 }
45b966db
ZW
906}
907
5fddcffc
NB
908/* Allocate COUNT tokens for RUN. */
909void
910_cpp_init_tokenrun (run, count)
911 tokenrun *run;
912 unsigned int count;
913{
914 run->base = xnewvec (cpp_token, count);
915 run->limit = run->base + count;
916 run->next = NULL;
917}
918
919/* Returns the next tokenrun, or creates one if there is none. */
920static tokenrun *
921next_tokenrun (run)
922 tokenrun *run;
923{
924 if (run->next == NULL)
925 {
926 run->next = xnew (tokenrun);
bdcbe496 927 run->next->prev = run;
5fddcffc
NB
928 _cpp_init_tokenrun (run->next, 250);
929 }
930
931 return run->next;
932}
933
4ed5bcfb
NB
934/* Allocate a single token that is invalidated at the same time as the
935 rest of the tokens on the line. Has its line and col set to the
936 same as the last lexed token, so that diagnostics appear in the
937 right place. */
938cpp_token *
939_cpp_temp_token (pfile)
940 cpp_reader *pfile;
941{
942 cpp_token *old, *result;
943
944 old = pfile->cur_token - 1;
945 if (pfile->cur_token == pfile->cur_run->limit)
946 {
947 pfile->cur_run = next_tokenrun (pfile->cur_run);
948 pfile->cur_token = pfile->cur_run->base;
949 }
950
951 result = pfile->cur_token++;
952 result->line = old->line;
953 result->col = old->col;
954 return result;
955}
956
14baae01
NB
957/* Lex a token into RESULT (external interface). Takes care of issues
958 like directive handling, token lookahead, multiple include
959 opimisation and skipping. */
345894b4
NB
960const cpp_token *
961_cpp_lex_token (pfile)
45b966db 962 cpp_reader *pfile;
5fddcffc 963{
bdcbe496 964 cpp_token *result;
5fddcffc 965
bdcbe496 966 for (;;)
5fddcffc 967 {
bdcbe496 968 if (pfile->cur_token == pfile->cur_run->limit)
5fddcffc 969 {
bdcbe496
NB
970 pfile->cur_run = next_tokenrun (pfile->cur_run);
971 pfile->cur_token = pfile->cur_run->base;
5fddcffc
NB
972 }
973
bdcbe496 974 if (pfile->lookaheads)
14baae01
NB
975 {
976 pfile->lookaheads--;
977 result = pfile->cur_token++;
978 }
bdcbe496 979 else
14baae01 980 result = _cpp_lex_direct (pfile);
bdcbe496
NB
981
982 if (result->flags & BOL)
5fddcffc 983 {
bdcbe496
NB
984 /* Is this a directive. If _cpp_handle_directive returns
985 false, it is an assembler #. */
986 if (result->type == CPP_HASH
987 && !pfile->state.parsing_args
988 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
989 continue;
97293897
NB
990 if (pfile->cb.line_change && !pfile->state.skipping)
991 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
5fddcffc 992 }
5fddcffc 993
bdcbe496
NB
994 /* We don't skip tokens in directives. */
995 if (pfile->state.in_directive)
996 break;
5fddcffc 997
bdcbe496 998 /* Outside a directive, invalidate controlling macros. At file
14baae01 999 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
bdcbe496 1000 get here and MI optimisation works. */
5fddcffc 1001 pfile->mi_valid = false;
bdcbe496
NB
1002
1003 if (!pfile->state.skipping || result->type == CPP_EOF)
1004 break;
5fddcffc
NB
1005 }
1006
345894b4 1007 return result;
5fddcffc
NB
1008}
1009
14baae01
NB
1010/* Lex a token into pfile->cur_token, which is also incremented, to
1011 get diagnostics pointing to the correct location.
1012
1013 Does not handle issues such as token lookahead, multiple-include
1014 optimisation, directives, skipping etc. This function is only
1015 suitable for use by _cpp_lex_token, and in special cases like
1016 lex_expansion_token which doesn't care for any of these issues.
1017
1018 When meeting a newline, returns CPP_EOF if parsing a directive,
1019 otherwise returns to the start of the token buffer if permissible.
1020 Returns the location of the lexed token. */
1021cpp_token *
1022_cpp_lex_direct (pfile)
5fddcffc 1023 cpp_reader *pfile;
45b966db 1024{
0d9f234d 1025 cppchar_t c;
adb84b42 1026 cpp_buffer *buffer;
0d9f234d 1027 const unsigned char *comment_start;
14baae01 1028 cpp_token *result = pfile->cur_token++;
9ec7291f 1029
5fddcffc 1030 fresh_line:
adb84b42 1031 buffer = pfile->buffer;
bd969772
NB
1032 result->flags = buffer->saved_flags;
1033 buffer->saved_flags = 0;
5fddcffc 1034 update_tokens_line:
1444f2ed 1035 result->line = pfile->line;
041c3194 1036
5fddcffc 1037 skipped_white:
0d9f234d
NB
1038 c = buffer->read_ahead;
1039 if (c == EOF && buffer->cur < buffer->rlimit)
5fddcffc
NB
1040 c = *buffer->cur++;
1041 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
0d9f234d 1042 buffer->read_ahead = EOF;
5fddcffc
NB
1043
1044 trigraph:
0d9f234d 1045 switch (c)
45b966db 1046 {
0d9f234d 1047 case EOF:
bdcbe496 1048 buffer->saved_flags = BOL;
5fddcffc 1049 if (!pfile->state.parsing_args && !pfile->state.in_directive)
ef6e958a 1050 {
bdcbe496 1051 if (buffer->cur != buffer->line_base)
5fddcffc
NB
1052 {
1053 /* Non-empty files should end in a newline. Don't warn
1054 for command line and _Pragma buffers. */
1055 if (!buffer->from_stage3)
1056 cpp_pedwarn (pfile, "no newline at end of file");
1057 handle_newline (pfile, '\n');
7364fdd8 1058 }
bdcbe496
NB
1059
1060 /* Don't pop the last buffer. */
1061 if (buffer->prev)
1062 {
1063 unsigned char stop = buffer->return_at_eof;
1064
1065 _cpp_pop_buffer (pfile);
1066 if (!stop)
1067 goto fresh_line;
1068 }
ef6e958a 1069 }
0d9f234d 1070 result->type = CPP_EOF;
5fddcffc 1071 break;
45b966db 1072
0d9f234d
NB
1073 case ' ': case '\t': case '\f': case '\v': case '\0':
1074 skip_whitespace (pfile, c);
1075 result->flags |= PREV_WHITE;
5fddcffc 1076 goto skipped_white;
0d9f234d
NB
1077
1078 case '\n': case '\r':
bdcbe496
NB
1079 handle_newline (pfile, c);
1080 buffer->saved_flags = BOL;
1081 if (! pfile->state.in_directive)
45b966db 1082 {
4ed5bcfb
NB
1083 if (pfile->state.parsing_args == 2)
1084 buffer->saved_flags |= PREV_WHITE;
bdcbe496
NB
1085 if (!pfile->keep_tokens)
1086 {
1087 pfile->cur_run = &pfile->base_run;
1088 result = pfile->base_run.base;
1089 pfile->cur_token = result + 1;
1090 }
1091 goto fresh_line;
45b966db 1092 }
5fddcffc
NB
1093 result->type = CPP_EOF;
1094 break;
46d07497 1095
0d9f234d
NB
1096 case '?':
1097 case '\\':
1098 /* These could start an escaped newline, or '?' a trigraph. Let
1099 skip_escaped_newlines do all the work. */
1100 {
67821e3a 1101 unsigned int line = pfile->line;
0d9f234d 1102
29401c30 1103 c = skip_escaped_newlines (pfile, c);
67821e3a 1104 if (line != pfile->line)
0d9f234d
NB
1105 /* We had at least one escaped newline of some sort, and the
1106 next character is in buffer->read_ahead. Update the
1107 token's line and column. */
5fddcffc 1108 goto update_tokens_line;
0d9f234d
NB
1109
1110 /* We are either the original '?' or '\\', or a trigraph. */
1111 result->type = CPP_QUERY;
1112 buffer->read_ahead = EOF;
1113 if (c == '\\')
12c4f523 1114 goto random_char;
0d9f234d 1115 else if (c != '?')
5fddcffc 1116 goto trigraph;
0d9f234d
NB
1117 }
1118 break;
46d07497 1119
0d9f234d
NB
1120 case '0': case '1': case '2': case '3': case '4':
1121 case '5': case '6': case '7': case '8': case '9':
1122 result->type = CPP_NUMBER;
93c80368 1123 parse_number (pfile, &result->val.str, c, 0);
0d9f234d 1124 break;
46d07497 1125
0d9f234d
NB
1126 case '$':
1127 if (!CPP_OPTION (pfile, dollars_in_ident))
1128 goto random_char;
ec5c56db 1129 /* Fall through... */
0d9f234d
NB
1130
1131 case '_':
1132 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1133 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1134 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1135 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1136 case 'y': case 'z':
1137 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1138 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1139 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1140 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1141 case 'Y': case 'Z':
1142 result->type = CPP_NAME;
2c3fcba6 1143 result->val.node = parse_identifier (pfile);
0d9f234d
NB
1144
1145 /* 'L' may introduce wide characters or strings. */
93c80368 1146 if (result->val.node == pfile->spec_nodes.n_L)
0d9f234d 1147 {
2c3fcba6
ZW
1148 c = buffer->read_ahead;
1149 if (c == EOF && buffer->cur < buffer->rlimit)
1150 c = *buffer->cur;
0d9f234d 1151 if (c == '\'' || c == '"')
ba89d661 1152 {
2c3fcba6 1153 buffer->cur++;
0d9f234d
NB
1154 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1155 goto make_string;
ba89d661 1156 }
0d9f234d
NB
1157 }
1158 /* Convert named operators to their proper types. */
93c80368 1159 else if (result->val.node->flags & NODE_OPERATOR)
0d9f234d
NB
1160 {
1161 result->flags |= NAMED_OP;
93c80368 1162 result->type = result->val.node->value.operator;
0d9f234d
NB
1163 }
1164 break;
1165
1166 case '\'':
1167 case '"':
1168 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1169 make_string:
1170 parse_string (pfile, result, c);
1171 break;
041c3194 1172
0d9f234d 1173 case '/':
1c6d33ef
NB
1174 /* A potential block or line comment. */
1175 comment_start = buffer->cur;
0d9f234d 1176 result->type = CPP_DIV;
29401c30 1177 c = get_effective_char (pfile);
0d9f234d
NB
1178 if (c == '=')
1179 ACCEPT_CHAR (CPP_DIV_EQ);
1c6d33ef
NB
1180 if (c != '/' && c != '*')
1181 break;
e61fc951 1182
1c6d33ef
NB
1183 if (c == '*')
1184 {
0d9f234d 1185 if (skip_block_comment (pfile))
67821e3a 1186 cpp_error (pfile, "unterminated comment");
0d9f234d 1187 }
1c6d33ef 1188 else
0d9f234d 1189 {
1c6d33ef
NB
1190 if (!CPP_OPTION (pfile, cplusplus_comments)
1191 && !CPP_IN_SYSTEM_HEADER (pfile))
1192 break;
1193
bdb05a7b
NB
1194 /* Warn about comments only if pedantically GNUC89, and not
1195 in system headers. */
1196 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 1197 && ! buffer->warned_cplusplus_comments)
041c3194 1198 {
1c6d33ef
NB
1199 cpp_pedwarn (pfile,
1200 "C++ style comments are not allowed in ISO C89");
1201 cpp_pedwarn (pfile,
1202 "(this will be reported only once per input file)");
1203 buffer->warned_cplusplus_comments = 1;
1204 }
0d9f234d 1205
a94c1199 1206 /* Skip_line_comment updates buffer->read_ahead. */
01ef6563 1207 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
50410426 1208 cpp_warning (pfile, "multi-line comment");
1c6d33ef 1209 }
0d9f234d 1210
1c6d33ef
NB
1211 /* Skipping the comment has updated buffer->read_ahead. */
1212 if (!pfile->state.save_comments)
1213 {
1214 result->flags |= PREV_WHITE;
5fddcffc 1215 goto update_tokens_line;
0d9f234d 1216 }
1c6d33ef
NB
1217
1218 /* Save the comment as a token in its own right. */
1219 save_comment (pfile, result, comment_start);
29b10746 1220 /* Don't do MI optimisation. */
bdcbe496 1221 break;
0d9f234d
NB
1222
1223 case '<':
1224 if (pfile->state.angled_headers)
1225 {
1226 result->type = CPP_HEADER_NAME;
1227 c = '>'; /* terminator. */
1228 goto make_string;
1229 }
45b966db 1230
0d9f234d 1231 result->type = CPP_LESS;
29401c30 1232 c = get_effective_char (pfile);
0d9f234d
NB
1233 if (c == '=')
1234 ACCEPT_CHAR (CPP_LESS_EQ);
1235 else if (c == '<')
1236 {
1237 ACCEPT_CHAR (CPP_LSHIFT);
29401c30 1238 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1239 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1240 }
1241 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1242 {
1243 ACCEPT_CHAR (CPP_MIN);
29401c30 1244 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1245 ACCEPT_CHAR (CPP_MIN_EQ);
1246 }
1247 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1248 {
1249 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1250 result->flags |= DIGRAPH;
1251 }
1252 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1253 {
1254 ACCEPT_CHAR (CPP_OPEN_BRACE);
1255 result->flags |= DIGRAPH;
1256 }
1257 break;
1258
1259 case '>':
1260 result->type = CPP_GREATER;
29401c30 1261 c = get_effective_char (pfile);
0d9f234d
NB
1262 if (c == '=')
1263 ACCEPT_CHAR (CPP_GREATER_EQ);
1264 else if (c == '>')
1265 {
1266 ACCEPT_CHAR (CPP_RSHIFT);
29401c30 1267 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1268 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1269 }
1270 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1271 {
1272 ACCEPT_CHAR (CPP_MAX);
29401c30 1273 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1274 ACCEPT_CHAR (CPP_MAX_EQ);
1275 }
1276 break;
1277
cbcff6df 1278 case '%':
29401c30 1279 lex_percent (pfile, result);
0d9f234d
NB
1280 break;
1281
cbcff6df
NB
1282 case '.':
1283 lex_dot (pfile, result);
0d9f234d 1284 break;
45b966db 1285
0d9f234d
NB
1286 case '+':
1287 result->type = CPP_PLUS;
29401c30 1288 c = get_effective_char (pfile);
0d9f234d
NB
1289 if (c == '=')
1290 ACCEPT_CHAR (CPP_PLUS_EQ);
1291 else if (c == '+')
1292 ACCEPT_CHAR (CPP_PLUS_PLUS);
1293 break;
04e3ec78 1294
0d9f234d
NB
1295 case '-':
1296 result->type = CPP_MINUS;
29401c30 1297 c = get_effective_char (pfile);
0d9f234d
NB
1298 if (c == '>')
1299 {
1300 ACCEPT_CHAR (CPP_DEREF);
1301 if (CPP_OPTION (pfile, cplusplus)
29401c30 1302 && get_effective_char (pfile) == '*')
0d9f234d
NB
1303 ACCEPT_CHAR (CPP_DEREF_STAR);
1304 }
1305 else if (c == '=')
1306 ACCEPT_CHAR (CPP_MINUS_EQ);
1307 else if (c == '-')
1308 ACCEPT_CHAR (CPP_MINUS_MINUS);
1309 break;
45b966db 1310
0d9f234d
NB
1311 case '*':
1312 result->type = CPP_MULT;
29401c30 1313 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1314 ACCEPT_CHAR (CPP_MULT_EQ);
1315 break;
04e3ec78 1316
0d9f234d
NB
1317 case '=':
1318 result->type = CPP_EQ;
29401c30 1319 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1320 ACCEPT_CHAR (CPP_EQ_EQ);
1321 break;
f8f769ea 1322
0d9f234d
NB
1323 case '!':
1324 result->type = CPP_NOT;
29401c30 1325 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1326 ACCEPT_CHAR (CPP_NOT_EQ);
1327 break;
45b966db 1328
0d9f234d
NB
1329 case '&':
1330 result->type = CPP_AND;
29401c30 1331 c = get_effective_char (pfile);
0d9f234d
NB
1332 if (c == '=')
1333 ACCEPT_CHAR (CPP_AND_EQ);
1334 else if (c == '&')
1335 ACCEPT_CHAR (CPP_AND_AND);
1336 break;
1337
1338 case '#':
a949941c 1339 result->type = CPP_HASH;
5fddcffc
NB
1340 if (get_effective_char (pfile) == '#')
1341 ACCEPT_CHAR (CPP_PASTE);
0d9f234d 1342 break;
45b966db 1343
0d9f234d
NB
1344 case '|':
1345 result->type = CPP_OR;
29401c30 1346 c = get_effective_char (pfile);
0d9f234d
NB
1347 if (c == '=')
1348 ACCEPT_CHAR (CPP_OR_EQ);
1349 else if (c == '|')
1350 ACCEPT_CHAR (CPP_OR_OR);
1351 break;
45b966db 1352
0d9f234d
NB
1353 case '^':
1354 result->type = CPP_XOR;
29401c30 1355 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1356 ACCEPT_CHAR (CPP_XOR_EQ);
1357 break;
45b966db 1358
0d9f234d
NB
1359 case ':':
1360 result->type = CPP_COLON;
29401c30 1361 c = get_effective_char (pfile);
0d9f234d
NB
1362 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1363 ACCEPT_CHAR (CPP_SCOPE);
1364 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1365 {
1366 result->flags |= DIGRAPH;
1367 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1368 }
1369 break;
45b966db 1370
0d9f234d
NB
1371 case '~': result->type = CPP_COMPL; break;
1372 case ',': result->type = CPP_COMMA; break;
1373 case '(': result->type = CPP_OPEN_PAREN; break;
1374 case ')': result->type = CPP_CLOSE_PAREN; break;
1375 case '[': result->type = CPP_OPEN_SQUARE; break;
1376 case ']': result->type = CPP_CLOSE_SQUARE; break;
1377 case '{': result->type = CPP_OPEN_BRACE; break;
1378 case '}': result->type = CPP_CLOSE_BRACE; break;
1379 case ';': result->type = CPP_SEMICOLON; break;
1380
cc937581
ZW
1381 /* @ is a punctuator in Objective C. */
1382 case '@': result->type = CPP_ATSIGN; break;
0d9f234d
NB
1383
1384 random_char:
1385 default:
1386 result->type = CPP_OTHER;
6c53ebff 1387 result->val.c = c;
0d9f234d
NB
1388 break;
1389 }
bdcbe496
NB
1390
1391 return result;
0d9f234d
NB
1392}
1393
93c80368
NB
1394/* An upper bound on the number of bytes needed to spell a token,
1395 including preceding whitespace. */
1396unsigned int
1397cpp_token_len (token)
1398 const cpp_token *token;
0d9f234d 1399{
93c80368 1400 unsigned int len;
6d2c2047 1401
93c80368 1402 switch (TOKEN_SPELL (token))
041c3194 1403 {
a28c5035
NB
1404 default: len = 0; break;
1405 case SPELL_STRING: len = token->val.str.len; break;
1406 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
041c3194 1407 }
93c80368
NB
1408 /* 1 for whitespace, 4 for comment delimeters. */
1409 return len + 5;
6d2c2047
ZW
1410}
1411
041c3194 1412/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1413 already contain the enough space to hold the token's spelling.
1414 Returns a pointer to the character after the last character
1415 written. */
93c80368
NB
1416unsigned char *
1417cpp_spell_token (pfile, token, buffer)
041c3194
ZW
1418 cpp_reader *pfile; /* Would be nice to be rid of this... */
1419 const cpp_token *token;
1420 unsigned char *buffer;
1421{
96be6998 1422 switch (TOKEN_SPELL (token))
041c3194
ZW
1423 {
1424 case SPELL_OPERATOR:
1425 {
1426 const unsigned char *spelling;
1427 unsigned char c;
d6d5f795 1428
041c3194 1429 if (token->flags & DIGRAPH)
37b8524c
JDA
1430 spelling
1431 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
92936ecf
ZW
1432 else if (token->flags & NAMED_OP)
1433 goto spell_ident;
041c3194 1434 else
96be6998 1435 spelling = TOKEN_NAME (token);
041c3194
ZW
1436
1437 while ((c = *spelling++) != '\0')
1438 *buffer++ = c;
1439 }
1440 break;
d6d5f795 1441
041c3194 1442 case SPELL_IDENT:
92936ecf 1443 spell_ident:
a28c5035
NB
1444 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1445 buffer += NODE_LEN (token->val.node);
041c3194 1446 break;
d6d5f795 1447
041c3194
ZW
1448 case SPELL_STRING:
1449 {
ba89d661
ZW
1450 int left, right, tag;
1451 switch (token->type)
1452 {
1453 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1454 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
ba89d661
ZW
1455 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1456 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1457 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1458 default: left = '\0'; right = '\0'; tag = '\0'; break;
1459 }
1460 if (tag) *buffer++ = tag;
1461 if (left) *buffer++ = left;
bfb9dc7f
ZW
1462 memcpy (buffer, token->val.str.text, token->val.str.len);
1463 buffer += token->val.str.len;
ba89d661 1464 if (right) *buffer++ = right;
041c3194
ZW
1465 }
1466 break;
d6d5f795 1467
041c3194 1468 case SPELL_CHAR:
6c53ebff 1469 *buffer++ = token->val.c;
041c3194 1470 break;
d6d5f795 1471
041c3194 1472 case SPELL_NONE:
96be6998 1473 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1474 break;
1475 }
d6d5f795 1476
041c3194
ZW
1477 return buffer;
1478}
d6d5f795 1479
93c80368
NB
1480/* Returns a token as a null-terminated string. The string is
1481 temporary, and automatically freed later. Useful for diagnostics. */
1482unsigned char *
1483cpp_token_as_text (pfile, token)
c5a04734 1484 cpp_reader *pfile;
041c3194 1485 const cpp_token *token;
c5a04734 1486{
93c80368 1487 unsigned int len = cpp_token_len (token);
49fe13f6 1488 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
c5a04734 1489
93c80368
NB
1490 end = cpp_spell_token (pfile, token, start);
1491 end[0] = '\0';
c5a04734 1492
93c80368
NB
1493 return start;
1494}
c5a04734 1495
93c80368
NB
1496/* Used by C front ends. Should really move to using cpp_token_as_text. */
1497const char *
1498cpp_type2name (type)
1499 enum cpp_ttype type;
1500{
1501 return (const char *) token_spellings[type].name;
1502}
c5a04734 1503
4ed5bcfb
NB
1504/* Writes the spelling of token to FP, without any preceding space.
1505 Separated from cpp_spell_token for efficiency - to avoid stdio
1506 double-buffering. */
93c80368
NB
1507void
1508cpp_output_token (token, fp)
1509 const cpp_token *token;
1510 FILE *fp;
1511{
93c80368 1512 switch (TOKEN_SPELL (token))
c5a04734 1513 {
93c80368
NB
1514 case SPELL_OPERATOR:
1515 {
1516 const unsigned char *spelling;
c5a04734 1517
93c80368 1518 if (token->flags & DIGRAPH)
37b8524c
JDA
1519 spelling
1520 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
93c80368
NB
1521 else if (token->flags & NAMED_OP)
1522 goto spell_ident;
1523 else
1524 spelling = TOKEN_NAME (token);
041c3194 1525
93c80368
NB
1526 ufputs (spelling, fp);
1527 }
1528 break;
041c3194 1529
93c80368
NB
1530 spell_ident:
1531 case SPELL_IDENT:
a28c5035 1532 ufputs (NODE_NAME (token->val.node), fp);
93c80368 1533 break;
041c3194 1534
93c80368
NB
1535 case SPELL_STRING:
1536 {
1537 int left, right, tag;
1538 switch (token->type)
1539 {
1540 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1541 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
93c80368
NB
1542 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1543 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1544 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1545 default: left = '\0'; right = '\0'; tag = '\0'; break;
1546 }
1547 if (tag) putc (tag, fp);
1548 if (left) putc (left, fp);
1549 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1550 if (right) putc (right, fp);
1551 }
1552 break;
c5a04734 1553
93c80368 1554 case SPELL_CHAR:
6c53ebff 1555 putc (token->val.c, fp);
93c80368 1556 break;
c5a04734 1557
93c80368
NB
1558 case SPELL_NONE:
1559 /* An error, most probably. */
1560 break;
041c3194 1561 }
c5a04734
ZW
1562}
1563
93c80368
NB
1564/* Compare two tokens. */
1565int
1566_cpp_equiv_tokens (a, b)
1567 const cpp_token *a, *b;
c5a04734 1568{
93c80368
NB
1569 if (a->type == b->type && a->flags == b->flags)
1570 switch (TOKEN_SPELL (a))
1571 {
1572 default: /* Keep compiler happy. */
1573 case SPELL_OPERATOR:
1574 return 1;
1575 case SPELL_CHAR:
6c53ebff 1576 return a->val.c == b->val.c; /* Character. */
93c80368 1577 case SPELL_NONE:
56051c0a 1578 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
93c80368
NB
1579 case SPELL_IDENT:
1580 return a->val.node == b->val.node;
1581 case SPELL_STRING:
1582 return (a->val.str.len == b->val.str.len
1583 && !memcmp (a->val.str.text, b->val.str.text,
1584 a->val.str.len));
1585 }
c5a04734 1586
041c3194
ZW
1587 return 0;
1588}
1589
041c3194
ZW
1590/* Determine whether two tokens can be pasted together, and if so,
1591 what the resulting token is. Returns CPP_EOF if the tokens cannot
1592 be pasted, or the appropriate type for the merged token if they
1593 can. */
7de4d004 1594enum cpp_ttype
93c80368 1595cpp_can_paste (pfile, token1, token2, digraph)
041c3194
ZW
1596 cpp_reader * pfile;
1597 const cpp_token *token1, *token2;
1598 int* digraph;
c5a04734 1599{
041c3194
ZW
1600 enum cpp_ttype a = token1->type, b = token2->type;
1601 int cxx = CPP_OPTION (pfile, cplusplus);
c5a04734 1602
92936ecf
ZW
1603 /* Treat named operators as if they were ordinary NAMEs. */
1604 if (token1->flags & NAMED_OP)
1605 a = CPP_NAME;
1606 if (token2->flags & NAMED_OP)
1607 b = CPP_NAME;
1608
37b8524c
JDA
1609 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1610 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
c5a04734 1611
041c3194 1612 switch (a)
c5a04734 1613 {
041c3194
ZW
1614 case CPP_GREATER:
1615 if (b == a) return CPP_RSHIFT;
1616 if (b == CPP_QUERY && cxx) return CPP_MAX;
1617 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1618 break;
1619 case CPP_LESS:
1620 if (b == a) return CPP_LSHIFT;
1621 if (b == CPP_QUERY && cxx) return CPP_MIN;
1622 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
9b55f29a
NB
1623 if (CPP_OPTION (pfile, digraphs))
1624 {
1625 if (b == CPP_COLON)
1626 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1627 if (b == CPP_MOD)
1628 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1629 }
041c3194 1630 break;
c5a04734 1631
041c3194
ZW
1632 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1633 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1634 case CPP_OR: if (b == a) return CPP_OR_OR; break;
c5a04734 1635
041c3194
ZW
1636 case CPP_MINUS:
1637 if (b == a) return CPP_MINUS_MINUS;
1638 if (b == CPP_GREATER) return CPP_DEREF;
1639 break;
1640 case CPP_COLON:
1641 if (b == a && cxx) return CPP_SCOPE;
9b55f29a 1642 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
041c3194
ZW
1643 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1644 break;
1645
1646 case CPP_MOD:
9b55f29a
NB
1647 if (CPP_OPTION (pfile, digraphs))
1648 {
1649 if (b == CPP_GREATER)
1650 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1651 if (b == CPP_COLON)
1652 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1653 }
041c3194
ZW
1654 break;
1655 case CPP_DEREF:
1656 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1657 break;
1658 case CPP_DOT:
1659 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1660 if (b == CPP_NUMBER) return CPP_NUMBER;
1661 break;
1662
1663 case CPP_HASH:
1664 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1665 /* %:%: digraph */
1666 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1667 break;
1668
1669 case CPP_NAME:
1670 if (b == CPP_NAME) return CPP_NAME;
1671 if (b == CPP_NUMBER
93c80368 1672 && name_p (pfile, &token2->val.str)) return CPP_NAME;
041c3194 1673 if (b == CPP_CHAR
93c80368 1674 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
041c3194 1675 if (b == CPP_STRING
93c80368 1676 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
041c3194
ZW
1677 break;
1678
1679 case CPP_NUMBER:
1680 if (b == CPP_NUMBER) return CPP_NUMBER;
1681 if (b == CPP_NAME) return CPP_NUMBER;
1682 if (b == CPP_DOT) return CPP_NUMBER;
1683 /* Numbers cannot have length zero, so this is safe. */
1684 if ((b == CPP_PLUS || b == CPP_MINUS)
bfb9dc7f 1685 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
041c3194
ZW
1686 return CPP_NUMBER;
1687 break;
1688
1689 default:
1690 break;
c5a04734
ZW
1691 }
1692
041c3194
ZW
1693 return CPP_EOF;
1694}
1695
93c80368
NB
1696/* Returns nonzero if a space should be inserted to avoid an
1697 accidental token paste for output. For simplicity, it is
1698 conservative, and occasionally advises a space where one is not
1699 needed, e.g. "." and ".2". */
041c3194 1700
93c80368
NB
1701int
1702cpp_avoid_paste (pfile, token1, token2)
c5a04734 1703 cpp_reader *pfile;
93c80368 1704 const cpp_token *token1, *token2;
c5a04734 1705{
93c80368
NB
1706 enum cpp_ttype a = token1->type, b = token2->type;
1707 cppchar_t c;
c5a04734 1708
93c80368
NB
1709 if (token1->flags & NAMED_OP)
1710 a = CPP_NAME;
1711 if (token2->flags & NAMED_OP)
1712 b = CPP_NAME;
c5a04734 1713
93c80368
NB
1714 c = EOF;
1715 if (token2->flags & DIGRAPH)
37b8524c 1716 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
1717 else if (token_spellings[b].category == SPELL_OPERATOR)
1718 c = token_spellings[b].name[0];
c5a04734 1719
93c80368 1720 /* Quickly get everything that can paste with an '='. */
37b8524c 1721 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 1722 return 1;
c5a04734 1723
93c80368 1724 switch (a)
c5a04734 1725 {
93c80368
NB
1726 case CPP_GREATER: return c == '>' || c == '?';
1727 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1728 case CPP_PLUS: return c == '+';
1729 case CPP_MINUS: return c == '-' || c == '>';
1730 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1731 case CPP_MOD: return c == ':' || c == '>';
1732 case CPP_AND: return c == '&';
1733 case CPP_OR: return c == '|';
1734 case CPP_COLON: return c == ':' || c == '>';
1735 case CPP_DEREF: return c == '*';
26ec42ee 1736 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
1737 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1738 case CPP_NAME: return ((b == CPP_NUMBER
1739 && name_p (pfile, &token2->val.str))
1740 || b == CPP_NAME
1741 || b == CPP_CHAR || b == CPP_STRING); /* L */
1742 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1743 || c == '.' || c == '+' || c == '-');
1744 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
6c53ebff 1745 && token1->val.c == '@'
93c80368
NB
1746 && (b == CPP_NAME || b == CPP_STRING));
1747 default: break;
c5a04734 1748 }
c5a04734 1749
417f3e3a 1750 return 0;
c5a04734
ZW
1751}
1752
93c80368 1753/* Output all the remaining tokens on the current line, and a newline
4ed5bcfb
NB
1754 character, to FP. Leading whitespace is removed. If there are
1755 macros, special token padding is not performed. */
c5a04734 1756void
93c80368 1757cpp_output_line (pfile, fp)
c5a04734 1758 cpp_reader *pfile;
93c80368 1759 FILE *fp;
c5a04734 1760{
4ed5bcfb 1761 const cpp_token *token;
96be6998 1762
4ed5bcfb
NB
1763 token = cpp_get_token (pfile);
1764 while (token->type != CPP_EOF)
96be6998 1765 {
4ed5bcfb
NB
1766 cpp_output_token (token, fp);
1767 token = cpp_get_token (pfile);
1768 if (token->flags & PREV_WHITE)
1769 putc (' ', fp);
96be6998
ZW
1770 }
1771
93c80368 1772 putc ('\n', fp);
041c3194 1773}
c5a04734 1774
c8a96070
NB
1775/* Returns the value of a hexadecimal digit. */
1776static unsigned int
1777hex_digit_value (c)
1778 unsigned int c;
1779{
1780 if (c >= 'a' && c <= 'f')
1781 return c - 'a' + 10;
1782 if (c >= 'A' && c <= 'F')
1783 return c - 'A' + 10;
1784 if (c >= '0' && c <= '9')
1785 return c - '0';
1786 abort ();
1787}
1788
62729350
NB
1789/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1790 failure if cpplib is not parsing C++ or C99. Such failure is
1791 silent, and no variables are updated. Otherwise returns 0, and
1792 warns if -Wtraditional.
c8a96070
NB
1793
1794 [lex.charset]: The character designated by the universal character
1795 name \UNNNNNNNN is that character whose character short name in
1796 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1797 universal character name \uNNNN is that character whose character
1798 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1799 for a universal character name is less than 0x20 or in the range
1800 0x7F-0x9F (inclusive), or if the universal character name
1801 designates a character in the basic source character set, then the
1802 program is ill-formed.
1803
1804 We assume that wchar_t is Unicode, so we don't need to do any
62729350 1805 mapping. Is this ever wrong?
c8a96070 1806
62729350
NB
1807 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1808 LIMIT is the end of the string or charconst. PSTR is updated to
1809 point after the UCS on return, and the UCS is written into PC. */
1810
1811static int
1812maybe_read_ucs (pfile, pstr, limit, pc)
c8a96070
NB
1813 cpp_reader *pfile;
1814 const unsigned char **pstr;
1815 const unsigned char *limit;
62729350 1816 unsigned int *pc;
c8a96070
NB
1817{
1818 const unsigned char *p = *pstr;
62729350
NB
1819 unsigned int code = 0;
1820 unsigned int c = *pc, length;
1821
1822 /* Only attempt to interpret a UCS for C++ and C99. */
1823 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1824 return 1;
c8a96070 1825
62729350
NB
1826 if (CPP_WTRADITIONAL (pfile))
1827 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
c8a96070 1828
f8710242
NB
1829 length = (c == 'u' ? 4: 8);
1830
1831 if ((size_t) (limit - p) < length)
1832 {
1833 cpp_error (pfile, "incomplete universal-character-name");
1834 /* Skip to the end to avoid more diagnostics. */
1835 p = limit;
1836 }
1837 else
1838 {
1839 for (; length; length--, p++)
c8a96070 1840 {
f8710242
NB
1841 c = *p;
1842 if (ISXDIGIT (c))
1843 code = (code << 4) + hex_digit_value (c);
1844 else
1845 {
1846 cpp_error (pfile,
1847 "non-hex digit '%c' in universal-character-name", c);
1848 /* We shouldn't skip in case there are multibyte chars. */
1849 break;
1850 }
c8a96070 1851 }
c8a96070
NB
1852 }
1853
1854#ifdef TARGET_EBCDIC
1855 cpp_error (pfile, "universal-character-name on EBCDIC target");
1856 code = 0x3f; /* EBCDIC invalid character */
1857#else
f8710242
NB
1858 /* True extended characters are OK. */
1859 if (code >= 0xa0
1860 && !(code & 0x80000000)
1861 && !(code >= 0xD800 && code <= 0xDFFF))
1862 ;
1863 /* The standard permits $, @ and ` to be specified as UCNs. We use
1864 hex escapes so that this also works with EBCDIC hosts. */
1865 else if (code == 0x24 || code == 0x40 || code == 0x60)
1866 ;
1867 /* Don't give another error if one occurred above. */
1868 else if (length == 0)
1869 cpp_error (pfile, "universal-character-name out of range");
c8a96070
NB
1870#endif
1871
1872 *pstr = p;
62729350
NB
1873 *pc = code;
1874 return 0;
c8a96070
NB
1875}
1876
1877/* Interpret an escape sequence, and return its value. PSTR points to
1878 the input pointer, which is just after the backslash. LIMIT is how
62729350
NB
1879 much text we have. MASK is a bitmask for the precision for the
1880 destination type (char or wchar_t). TRADITIONAL, if true, does not
1881 interpret escapes that did not exist in traditional C.
c8a96070 1882
62729350
NB
1883 Handles all relevant diagnostics. */
1884
1885unsigned int
1886cpp_parse_escape (pfile, pstr, limit, mask, traditional)
c8a96070
NB
1887 cpp_reader *pfile;
1888 const unsigned char **pstr;
1889 const unsigned char *limit;
62729350 1890 unsigned HOST_WIDE_INT mask;
c8a96070
NB
1891 int traditional;
1892{
1893 int unknown = 0;
1894 const unsigned char *str = *pstr;
1895 unsigned int c = *str++;
1896
1897 switch (c)
1898 {
1899 case '\\': case '\'': case '"': case '?': break;
1900 case 'b': c = TARGET_BS; break;
1901 case 'f': c = TARGET_FF; break;
1902 case 'n': c = TARGET_NEWLINE; break;
1903 case 'r': c = TARGET_CR; break;
1904 case 't': c = TARGET_TAB; break;
1905 case 'v': c = TARGET_VT; break;
1906
1907 case '(': case '{': case '[': case '%':
1908 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1909 '\%' is used to prevent SCCS from getting confused. */
1910 unknown = CPP_PEDANTIC (pfile);
1911 break;
1912
1913 case 'a':
1914 if (CPP_WTRADITIONAL (pfile))
1915 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1916 if (!traditional)
1917 c = TARGET_BELL;
1918 break;
1919
1920 case 'e': case 'E':
1921 if (CPP_PEDANTIC (pfile))
1922 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1923 c = TARGET_ESC;
1924 break;
1925
c8a96070 1926 case 'u': case 'U':
62729350 1927 unknown = maybe_read_ucs (pfile, &str, limit, &c);
c8a96070
NB
1928 break;
1929
1930 case 'x':
1931 if (CPP_WTRADITIONAL (pfile))
1932 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1933
1934 if (!traditional)
1935 {
1936 unsigned int i = 0, overflow = 0;
1937 int digits_found = 0;
1938
1939 while (str < limit)
1940 {
1941 c = *str;
1942 if (! ISXDIGIT (c))
1943 break;
1944 str++;
1945 overflow |= i ^ (i << 4 >> 4);
1946 i = (i << 4) + hex_digit_value (c);
1947 digits_found = 1;
1948 }
1949
1950 if (!digits_found)
1951 cpp_error (pfile, "\\x used with no following hex digits");
1952
1953 if (overflow | (i != (i & mask)))
1954 {
1955 cpp_pedwarn (pfile, "hex escape sequence out of range");
1956 i &= mask;
1957 }
1958 c = i;
1959 }
1960 break;
1961
1962 case '0': case '1': case '2': case '3':
1963 case '4': case '5': case '6': case '7':
1964 {
1965 unsigned int i = c - '0';
1966 int count = 0;
1967
1968 while (str < limit && ++count < 3)
1969 {
1970 c = *str;
1971 if (c < '0' || c > '7')
1972 break;
1973 str++;
1974 i = (i << 3) + c - '0';
1975 }
1976
1977 if (i != (i & mask))
1978 {
1979 cpp_pedwarn (pfile, "octal escape sequence out of range");
1980 i &= mask;
1981 }
1982 c = i;
1983 }
1984 break;
1985
1986 default:
1987 unknown = 1;
1988 break;
1989 }
1990
1991 if (unknown)
1992 {
1993 if (ISGRAPH (c))
1994 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1995 else
1996 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1997 }
1998
62729350
NB
1999 if (c > mask)
2000 cpp_pedwarn (pfile, "escape sequence out of range for character");
2001
c8a96070
NB
2002 *pstr = str;
2003 return c;
2004}
2005
2006#ifndef MAX_CHAR_TYPE_SIZE
2007#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
2008#endif
2009
2010#ifndef MAX_WCHAR_TYPE_SIZE
2011#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
2012#endif
2013
2014/* Interpret a (possibly wide) character constant in TOKEN.
2015 WARN_MULTI warns about multi-character charconsts, if not
2016 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
2017 that did not exist in traditional C. PCHARS_SEEN points to a
2018 variable that is filled in with the number of characters seen. */
2019HOST_WIDE_INT
2020cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
2021 cpp_reader *pfile;
2022 const cpp_token *token;
2023 int warn_multi;
2024 int traditional;
2025 unsigned int *pchars_seen;
2026{
2027 const unsigned char *str = token->val.str.text;
2028 const unsigned char *limit = str + token->val.str.len;
2029 unsigned int chars_seen = 0;
2030 unsigned int width, max_chars, c;
2a967f3d
NB
2031 unsigned HOST_WIDE_INT mask;
2032 HOST_WIDE_INT result = 0;
c8a96070
NB
2033
2034#ifdef MULTIBYTE_CHARS
2035 (void) local_mbtowc (NULL, NULL, 0);
2036#endif
2037
2038 /* Width in bits. */
2039 if (token->type == CPP_CHAR)
2040 width = MAX_CHAR_TYPE_SIZE;
2041 else
2042 width = MAX_WCHAR_TYPE_SIZE;
2043
2044 if (width < HOST_BITS_PER_WIDE_INT)
2045 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
2046 else
2047 mask = ~0;
2048 max_chars = HOST_BITS_PER_WIDE_INT / width;
2049
2050 while (str < limit)
2051 {
2052#ifdef MULTIBYTE_CHARS
2053 wchar_t wc;
2054 int char_len;
2055
2056 char_len = local_mbtowc (&wc, str, limit - str);
2057 if (char_len == -1)
2058 {
2059 cpp_warning (pfile, "ignoring invalid multibyte character");
2060 c = *str++;
2061 }
2062 else
2063 {
2064 str += char_len;
2065 c = wc;
2066 }
2067#else
2068 c = *str++;
2069#endif
2070
2071 if (c == '\\')
62729350 2072 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
c8a96070
NB
2073
2074#ifdef MAP_CHARACTER
2075 if (ISPRINT (c))
2076 c = MAP_CHARACTER (c);
2077#endif
2078
2079 /* Merge character into result; ignore excess chars. */
2080 if (++chars_seen <= max_chars)
2081 {
2082 if (width < HOST_BITS_PER_WIDE_INT)
2083 result = (result << width) | (c & mask);
2084 else
2085 result = c;
2086 }
2087 }
2088
2089 if (chars_seen == 0)
2090 cpp_error (pfile, "empty character constant");
2091 else if (chars_seen > max_chars)
2092 {
2093 chars_seen = max_chars;
f8710242 2094 cpp_warning (pfile, "character constant too long");
c8a96070
NB
2095 }
2096 else if (chars_seen > 1 && !traditional && warn_multi)
2097 cpp_warning (pfile, "multi-character character constant");
2098
2099 /* If char type is signed, sign-extend the constant. The
2100 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
2101 if (token->type == CPP_CHAR && chars_seen)
2102 {
2103 unsigned int nbits = chars_seen * width;
2104 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2105
2106 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2107 || ((result >> (nbits - 1)) & 1) == 0)
2108 result &= mask;
2109 else
2110 result |= ~mask;
2111 }
2112
2113 *pchars_seen = chars_seen;
2114 return result;
2115}
2116
93c80368 2117/* Memory pools. */
417f3e3a 2118
93c80368 2119struct dummy
417f3e3a 2120{
93c80368
NB
2121 char c;
2122 union
2123 {
2124 double d;
2125 int *p;
2126 } u;
2127};
417f3e3a 2128
93c80368 2129#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
417f3e3a 2130
93c80368
NB
2131static int
2132chunk_suitable (pool, chunk, size)
2133 cpp_pool *pool;
2134 cpp_chunk *chunk;
2135 unsigned int size;
2136{
2137 /* Being at least twice SIZE means we can use memcpy in
2138 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2139 anyway. */
2140 return (chunk && pool->locked != chunk
2141 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
041c3194 2142}
c5a04734 2143
93c80368
NB
2144/* Returns the end of the new pool. PTR points to a char in the old
2145 pool, and is updated to point to the same char in the new pool. */
2146unsigned char *
2147_cpp_next_chunk (pool, len, ptr)
2148 cpp_pool *pool;
2149 unsigned int len;
2150 unsigned char **ptr;
041c3194 2151{
93c80368 2152 cpp_chunk *chunk = pool->cur->next;
c5a04734 2153
93c80368
NB
2154 /* LEN is the minimum size we want in the new pool. */
2155 len += POOL_ROOM (pool);
2156 if (! chunk_suitable (pool, chunk, len))
041c3194 2157 {
93c80368 2158 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
c5a04734 2159
93c80368
NB
2160 chunk->next = pool->cur->next;
2161 pool->cur->next = chunk;
c5a04734
ZW
2162 }
2163
93c80368
NB
2164 /* Update the pointer before changing chunk's front. */
2165 if (ptr)
2166 *ptr += chunk->base - POOL_FRONT (pool);
041c3194 2167
93c80368
NB
2168 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2169 chunk->front = chunk->base;
041c3194 2170
93c80368
NB
2171 pool->cur = chunk;
2172 return POOL_LIMIT (pool);
c5a04734
ZW
2173}
2174
93c80368
NB
2175static cpp_chunk *
2176new_chunk (size)
2177 unsigned int size;
041c3194 2178{
93c80368
NB
2179 unsigned char *base;
2180 cpp_chunk *result;
3fef5b2b 2181
269592a8 2182 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
93c80368
NB
2183 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2184 /* Put the chunk descriptor at the end. Then chunk overruns will
2185 cause obvious chaos. */
2186 result = (cpp_chunk *) (base + size);
2187 result->base = base;
2188 result->front = base;
2189 result->limit = base + size;
2190 result->next = 0;
417f3e3a 2191
93c80368 2192 return result;
041c3194
ZW
2193}
2194
93c80368
NB
2195void
2196_cpp_init_pool (pool, size, align, temp)
2197 cpp_pool *pool;
2198 unsigned int size, align, temp;
2199{
2200 if (align == 0)
2201 align = DEFAULT_ALIGNMENT;
2202 if (align & (align - 1))
2203 abort ();
2204 pool->align = align;
bef985f3
NB
2205 pool->first = new_chunk (size);
2206 pool->cur = pool->first;
93c80368
NB
2207 pool->locked = 0;
2208 pool->locks = 0;
2209 if (temp)
2210 pool->cur->next = pool->cur;
041c3194
ZW
2211}
2212
93c80368
NB
2213void
2214_cpp_lock_pool (pool)
2215 cpp_pool *pool;
041c3194 2216{
93c80368
NB
2217 if (pool->locks++ == 0)
2218 pool->locked = pool->cur;
041c3194
ZW
2219}
2220
93c80368
NB
2221void
2222_cpp_unlock_pool (pool)
2223 cpp_pool *pool;
041c3194 2224{
93c80368
NB
2225 if (--pool->locks == 0)
2226 pool->locked = 0;
041c3194
ZW
2227}
2228
93c80368
NB
2229void
2230_cpp_free_pool (pool)
2231 cpp_pool *pool;
3fef5b2b 2232{
bef985f3 2233 cpp_chunk *chunk = pool->first, *next;
3fef5b2b 2234
93c80368 2235 do
3fef5b2b 2236 {
93c80368
NB
2237 next = chunk->next;
2238 free (chunk->base);
2239 chunk = next;
3fef5b2b 2240 }
bef985f3 2241 while (chunk && chunk != pool->first);
041c3194 2242}
041c3194 2243
93c80368
NB
2244/* Reserve LEN bytes from a memory pool. */
2245unsigned char *
2246_cpp_pool_reserve (pool, len)
2247 cpp_pool *pool;
2248 unsigned int len;
041c3194 2249{
269592a8 2250 len = POOL_ALIGN (len, pool->align);
93c80368
NB
2251 if (len > (unsigned int) POOL_ROOM (pool))
2252 _cpp_next_chunk (pool, len, 0);
041c3194 2253
93c80368 2254 return POOL_FRONT (pool);
c5a04734
ZW
2255}
2256
93c80368
NB
2257/* Allocate LEN bytes from a memory pool. */
2258unsigned char *
2259_cpp_pool_alloc (pool, len)
2260 cpp_pool *pool;
2261 unsigned int len;
041c3194 2262{
93c80368 2263 unsigned char *result = _cpp_pool_reserve (pool, len);
417f3e3a 2264
93c80368
NB
2265 POOL_COMMIT (pool, len);
2266 return result;
041c3194 2267}
This page took 0.727183 seconds and 5 git commands to generate.