]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
c-parse.in (yyerror): Const-ification and/or static-ization.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
93c80368
NB
23/* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
27
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
041c3194 36
45b966db
ZW
37#include "config.h"
38#include "system.h"
45b966db
ZW
39#include "cpplib.h"
40#include "cpphash.h"
41
c8a96070
NB
42/* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
45#ifdef CROSS_COMPILE
46#undef MULTIBYTE_CHARS
47#endif
48
49#ifdef MULTIBYTE_CHARS
50#include "mbchar.h"
51#include <locale.h>
52#endif
53
93c80368
NB
54/* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
56enum spell_type
f9a0e96c 57{
93c80368
NB
58 SPELL_OPERATOR = 0,
59 SPELL_CHAR,
60 SPELL_IDENT,
61 SPELL_STRING,
62 SPELL_NONE
f9a0e96c
ZW
63};
64
93c80368 65struct token_spelling
f9a0e96c 66{
93c80368
NB
67 enum spell_type category;
68 const unsigned char *name;
f9a0e96c
ZW
69};
70
93c80368
NB
71const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
72 U":>", U"<%", U"%>"};
73
74#define OP(e, s) { SPELL_OPERATOR, U s },
75#define TK(e, s) { s, U STRINGX (e) },
76const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
77#undef OP
78#undef TK
79
80#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
f2d5f0cc 82
1444f2ed 83static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
29401c30
NB
84static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
85static cppchar_t get_effective_char PARAMS ((cpp_reader *));
0d9f234d 86
041c3194 87static int skip_block_comment PARAMS ((cpp_reader *));
cbcff6df 88static int skip_line_comment PARAMS ((cpp_reader *));
0d9f234d
NB
89static void adjust_column PARAMS ((cpp_reader *));
90static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
2c3fcba6
ZW
91static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
92static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
93 const U_CHAR *));
93c80368
NB
94static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
95static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
0d9f234d 96static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
93c80368 97static void unterminated PARAMS ((cpp_reader *, int));
0d9f234d
NB
98static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
99static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
29401c30 100static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
cbcff6df 101static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
93c80368 102static int name_p PARAMS ((cpp_reader *, const cpp_string *));
62729350
NB
103static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
104 const unsigned char *, unsigned int *));
5fddcffc
NB
105static int lex_directive PARAMS ((cpp_reader *));
106static void lex_token PARAMS ((cpp_reader *, cpp_token *, int));
107static tokenrun *next_tokenrun PARAMS ((tokenrun *));
f617b8e2 108
93c80368
NB
109static cpp_chunk *new_chunk PARAMS ((unsigned int));
110static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
c8a96070 111static unsigned int hex_digit_value PARAMS ((unsigned int));
15dad1d9 112
041c3194 113/* Utility routine:
9e62c811 114
bfb9dc7f
ZW
115 Compares, the token TOKEN to the NUL-terminated string STRING.
116 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
15dad1d9 117
041c3194 118int
bfb9dc7f
ZW
119cpp_ideq (token, string)
120 const cpp_token *token;
041c3194
ZW
121 const char *string;
122{
bfb9dc7f 123 if (token->type != CPP_NAME)
041c3194 124 return 0;
bfb9dc7f 125
a28c5035 126 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
15dad1d9 127}
1368ee70 128
0d9f234d
NB
129/* Call when meeting a newline. Returns the character after the newline
130 (or carriage-return newline combination), or EOF. */
131static cppchar_t
1444f2ed
NB
132handle_newline (pfile, newline_char)
133 cpp_reader *pfile;
0d9f234d
NB
134 cppchar_t newline_char;
135{
1444f2ed 136 cpp_buffer *buffer;
0d9f234d
NB
137 cppchar_t next = EOF;
138
1444f2ed 139 pfile->line++;
1444f2ed 140 buffer = pfile->buffer;
0d9f234d 141 buffer->col_adjust = 0;
0d9f234d
NB
142 buffer->line_base = buffer->cur;
143
144 /* Handle CR-LF and LF-CR combinations, get the next character. */
145 if (buffer->cur < buffer->rlimit)
146 {
147 next = *buffer->cur++;
148 if (next + newline_char == '\r' + '\n')
149 {
150 buffer->line_base = buffer->cur;
151 if (buffer->cur < buffer->rlimit)
152 next = *buffer->cur++;
153 else
154 next = EOF;
155 }
156 }
157
158 buffer->read_ahead = next;
159 return next;
160}
161
162/* Subroutine of skip_escaped_newlines; called when a trigraph is
163 encountered. It warns if necessary, and returns true if the
164 trigraph should be honoured. FROM_CHAR is the third character of a
165 trigraph, and presumed to be the previous character for position
166 reporting. */
45b966db 167static int
0d9f234d 168trigraph_ok (pfile, from_char)
45b966db 169 cpp_reader *pfile;
0d9f234d 170 cppchar_t from_char;
45b966db 171{
041c3194
ZW
172 int accept = CPP_OPTION (pfile, trigraphs);
173
cbcff6df
NB
174 /* Don't warn about trigraphs in comments. */
175 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
45b966db 176 {
0d9f234d 177 cpp_buffer *buffer = pfile->buffer;
67821e3a 178
041c3194 179 if (accept)
67821e3a 180 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
041c3194 181 "trigraph ??%c converted to %c",
0d9f234d
NB
182 (int) from_char,
183 (int) _cpp_trigraph_map[from_char]);
4a5b68a2
NB
184 else if (buffer->cur != buffer->last_Wtrigraphs)
185 {
186 buffer->last_Wtrigraphs = buffer->cur;
67821e3a 187 cpp_warning_with_line (pfile, pfile->line,
4a5b68a2
NB
188 CPP_BUF_COL (buffer) - 2,
189 "trigraph ??%c ignored", (int) from_char);
190 }
45b966db 191 }
0d9f234d 192
041c3194 193 return accept;
45b966db
ZW
194}
195
0d9f234d
NB
196/* Assumes local variables buffer and result. */
197#define ACCEPT_CHAR(t) \
198 do { result->type = t; buffer->read_ahead = EOF; } while (0)
199
200/* When we move to multibyte character sets, add to these something
201 that saves and restores the state of the multibyte conversion
202 library. This probably involves saving and restoring a "cookie".
203 In the case of glibc it is an 8-byte structure, so is not a high
204 overhead operation. In any case, it's out of the fast path. */
205#define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
206#define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
207
208/* Skips any escaped newlines introduced by NEXT, which is either a
209 '?' or a '\\'. Returns the next character, which will also have
a5c3cccd
NB
210 been placed in buffer->read_ahead. This routine performs
211 preprocessing stages 1 and 2 of the ISO C standard. */
0d9f234d 212static cppchar_t
29401c30
NB
213skip_escaped_newlines (pfile, next)
214 cpp_reader *pfile;
0d9f234d 215 cppchar_t next;
45b966db 216{
29401c30
NB
217 cpp_buffer *buffer = pfile->buffer;
218
a5c3cccd
NB
219 /* Only do this if we apply stages 1 and 2. */
220 if (!buffer->from_stage3)
041c3194 221 {
a5c3cccd
NB
222 cppchar_t next1;
223 const unsigned char *saved_cur;
224 int space;
225
226 do
0d9f234d 227 {
a5c3cccd
NB
228 if (buffer->cur == buffer->rlimit)
229 break;
230
231 SAVE_STATE ();
232 if (next == '?')
0d9f234d 233 {
a5c3cccd
NB
234 next1 = *buffer->cur++;
235 if (next1 != '?' || buffer->cur == buffer->rlimit)
236 {
237 RESTORE_STATE ();
238 break;
239 }
240
241 next1 = *buffer->cur++;
242 if (!_cpp_trigraph_map[next1]
29401c30 243 || !trigraph_ok (pfile, next1))
a5c3cccd
NB
244 {
245 RESTORE_STATE ();
246 break;
247 }
248
249 /* We have a full trigraph here. */
250 next = _cpp_trigraph_map[next1];
251 if (next != '\\' || buffer->cur == buffer->rlimit)
252 break;
253 SAVE_STATE ();
254 }
255
256 /* We have a backslash, and room for at least one more character. */
257 space = 0;
258 do
259 {
260 next1 = *buffer->cur++;
261 if (!is_nvspace (next1))
262 break;
263 space = 1;
0d9f234d 264 }
a5c3cccd 265 while (buffer->cur < buffer->rlimit);
041c3194 266
a5c3cccd 267 if (!is_vspace (next1))
0d9f234d
NB
268 {
269 RESTORE_STATE ();
270 break;
271 }
45b966db 272
29401c30
NB
273 if (space && !pfile->state.lexing_comment)
274 cpp_warning (pfile, "backslash and newline separated by space");
0d9f234d 275
29401c30 276 next = handle_newline (pfile, next1);
a5c3cccd 277 if (next == EOF)
29401c30 278 cpp_pedwarn (pfile, "backslash-newline at end of file");
0d9f234d 279 }
a5c3cccd 280 while (next == '\\' || next == '?');
041c3194 281 }
45b966db 282
0d9f234d
NB
283 buffer->read_ahead = next;
284 return next;
45b966db
ZW
285}
286
0d9f234d
NB
287/* Obtain the next character, after trigraph conversion and skipping
288 an arbitrary string of escaped newlines. The common case of no
289 trigraphs or escaped newlines falls through quickly. */
290static cppchar_t
29401c30
NB
291get_effective_char (pfile)
292 cpp_reader *pfile;
64aaf407 293{
29401c30 294 cpp_buffer *buffer = pfile->buffer;
0d9f234d
NB
295 cppchar_t next = EOF;
296
297 if (buffer->cur < buffer->rlimit)
298 {
299 next = *buffer->cur++;
300
301 /* '?' can introduce trigraphs (and therefore backslash); '\\'
302 can introduce escaped newlines, which we want to skip, or
303 UCNs, which, depending upon lexer state, we will handle in
304 the future. */
305 if (next == '?' || next == '\\')
29401c30 306 next = skip_escaped_newlines (pfile, next);
0d9f234d
NB
307 }
308
309 buffer->read_ahead = next;
310 return next;
64aaf407
NB
311}
312
0d9f234d
NB
313/* Skip a C-style block comment. We find the end of the comment by
314 seeing if an asterisk is before every '/' we encounter. Returns
315 non-zero if comment terminated by EOF, zero otherwise. */
041c3194
ZW
316static int
317skip_block_comment (pfile)
45b966db
ZW
318 cpp_reader *pfile;
319{
041c3194 320 cpp_buffer *buffer = pfile->buffer;
d8090680 321 cppchar_t c = EOF, prevc = EOF;
0d9f234d 322
cbcff6df 323 pfile->state.lexing_comment = 1;
0d9f234d 324 while (buffer->cur != buffer->rlimit)
45b966db 325 {
0d9f234d
NB
326 prevc = c, c = *buffer->cur++;
327
328 next_char:
329 /* FIXME: For speed, create a new character class of characters
93c80368 330 of interest inside block comments. */
0d9f234d 331 if (c == '?' || c == '\\')
29401c30 332 c = skip_escaped_newlines (pfile, c);
041c3194 333
0d9f234d
NB
334 /* People like decorating comments with '*', so check for '/'
335 instead for efficiency. */
041c3194 336 if (c == '/')
45b966db 337 {
0d9f234d
NB
338 if (prevc == '*')
339 break;
041c3194 340
0d9f234d
NB
341 /* Warn about potential nested comments, but not if the '/'
342 comes immediately before the true comment delimeter.
041c3194 343 Don't bother to get it right across escaped newlines. */
0d9f234d
NB
344 if (CPP_OPTION (pfile, warn_comments)
345 && buffer->cur != buffer->rlimit)
45b966db 346 {
0d9f234d
NB
347 prevc = c, c = *buffer->cur++;
348 if (c == '*' && buffer->cur != buffer->rlimit)
349 {
350 prevc = c, c = *buffer->cur++;
351 if (c != '/')
67821e3a
NB
352 cpp_warning_with_line (pfile, pfile->line,
353 CPP_BUF_COL (buffer) - 2,
0d9f234d
NB
354 "\"/*\" within comment");
355 }
356 goto next_char;
45b966db 357 }
45b966db 358 }
91fcd158 359 else if (is_vspace (c))
45b966db 360 {
1444f2ed 361 prevc = c, c = handle_newline (pfile, c);
0d9f234d 362 goto next_char;
45b966db 363 }
52fadca8 364 else if (c == '\t')
0d9f234d 365 adjust_column (pfile);
45b966db 366 }
041c3194 367
cbcff6df 368 pfile->state.lexing_comment = 0;
0d9f234d
NB
369 buffer->read_ahead = EOF;
370 return c != '/' || prevc != '*';
45b966db
ZW
371}
372
f9a0e96c 373/* Skip a C++ line comment. Handles escaped newlines. Returns
0d9f234d
NB
374 non-zero if a multiline comment. The following new line, if any,
375 is left in buffer->read_ahead. */
041c3194 376static int
cbcff6df
NB
377skip_line_comment (pfile)
378 cpp_reader *pfile;
45b966db 379{
cbcff6df 380 cpp_buffer *buffer = pfile->buffer;
67821e3a 381 unsigned int orig_line = pfile->line;
0d9f234d 382 cppchar_t c;
041c3194 383
cbcff6df 384 pfile->state.lexing_comment = 1;
0d9f234d 385 do
041c3194 386 {
0d9f234d
NB
387 c = EOF;
388 if (buffer->cur == buffer->rlimit)
389 break;
041c3194 390
0d9f234d
NB
391 c = *buffer->cur++;
392 if (c == '?' || c == '\\')
29401c30 393 c = skip_escaped_newlines (pfile, c);
041c3194 394 }
0d9f234d 395 while (!is_vspace (c));
45b966db 396
cbcff6df 397 pfile->state.lexing_comment = 0;
0d9f234d 398 buffer->read_ahead = c; /* Leave any newline for caller. */
67821e3a 399 return orig_line != pfile->line;
041c3194 400}
45b966db 401
0d9f234d
NB
402/* pfile->buffer->cur is one beyond the \t character. Update
403 col_adjust so we track the column correctly. */
52fadca8 404static void
0d9f234d 405adjust_column (pfile)
52fadca8 406 cpp_reader *pfile;
52fadca8 407{
0d9f234d
NB
408 cpp_buffer *buffer = pfile->buffer;
409 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
52fadca8
NB
410
411 /* Round it up to multiple of the tabstop, but subtract 1 since the
412 tab itself occupies a character position. */
0d9f234d
NB
413 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
414 - col % CPP_OPTION (pfile, tabstop)) - 1;
52fadca8
NB
415}
416
0d9f234d
NB
417/* Skips whitespace, saving the next non-whitespace character.
418 Adjusts pfile->col_adjust to account for tabs. Without this,
419 tokens might be assigned an incorrect column. */
041c3194 420static void
0d9f234d 421skip_whitespace (pfile, c)
041c3194 422 cpp_reader *pfile;
0d9f234d 423 cppchar_t c;
041c3194
ZW
424{
425 cpp_buffer *buffer = pfile->buffer;
0d9f234d 426 unsigned int warned = 0;
45b966db 427
0d9f234d 428 do
041c3194 429 {
91fcd158
NB
430 /* Horizontal space always OK. */
431 if (c == ' ')
0d9f234d 432 ;
91fcd158 433 else if (c == '\t')
0d9f234d
NB
434 adjust_column (pfile);
435 /* Just \f \v or \0 left. */
91fcd158 436 else if (c == '\0')
041c3194 437 {
91fcd158 438 if (!warned)
0d9f234d
NB
439 {
440 cpp_warning (pfile, "null character(s) ignored");
441 warned = 1;
442 }
45b966db 443 }
93c80368 444 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
67821e3a 445 cpp_pedwarn_with_line (pfile, pfile->line,
91fcd158
NB
446 CPP_BUF_COL (buffer),
447 "%s in preprocessing directive",
448 c == '\f' ? "form feed" : "vertical tab");
0d9f234d
NB
449
450 c = EOF;
451 if (buffer->cur == buffer->rlimit)
452 break;
453 c = *buffer->cur++;
45b966db 454 }
ec5c56db 455 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
0d9f234d
NB
456 while (is_nvspace (c));
457
458 /* Remember the next character. */
459 buffer->read_ahead = c;
041c3194 460}
45b966db 461
93c80368
NB
462/* See if the characters of a number token are valid in a name (no
463 '.', '+' or '-'). */
464static int
465name_p (pfile, string)
466 cpp_reader *pfile;
467 const cpp_string *string;
468{
469 unsigned int i;
470
471 for (i = 0; i < string->len; i++)
472 if (!is_idchar (string->text[i]))
473 return 0;
474
475 return 1;
476}
477
2c3fcba6
ZW
478/* Parse an identifier, skipping embedded backslash-newlines. This is
479 a critical inner loop. The common case is an identifier which has
480 not been split by backslash-newline, does not contain a dollar
481 sign, and has already been scanned (roughly 10:1 ratio of
482 seen:unseen identifiers in normal code; the distribution is
483 Poisson-like). Second most common case is a new identifier, not
484 split and no dollar sign. The other possibilities are rare and
485 have been relegated to parse_identifier_slow. */
0d9f234d
NB
486
487static cpp_hashnode *
2c3fcba6 488parse_identifier (pfile)
45b966db 489 cpp_reader *pfile;
45b966db 490{
93c80368 491 cpp_hashnode *result;
2c3fcba6
ZW
492 const U_CHAR *cur, *rlimit;
493
494 /* Fast-path loop. Skim over a normal identifier.
495 N.B. ISIDNUM does not include $. */
496 cur = pfile->buffer->cur - 1;
497 rlimit = pfile->buffer->rlimit;
498 do
499 cur++;
500 while (cur < rlimit && ISIDNUM (*cur));
501
502 /* Check for slow-path cases. */
503 if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
504 result = parse_identifier_slow (pfile, cur);
505 else
506 {
507 const U_CHAR *base = pfile->buffer->cur - 1;
508 result = (cpp_hashnode *)
509 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
510 pfile->buffer->cur = cur;
511 }
512
513 /* Rarely, identifiers require diagnostics when lexed.
514 XXX Has to be forced out of the fast path. */
515 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
516 && !pfile->state.skipping, 0))
517 {
518 /* It is allowed to poison the same identifier twice. */
519 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
520 cpp_error (pfile, "attempt to use poisoned \"%s\"",
521 NODE_NAME (result));
522
523 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
524 replacement list of a variadic macro. */
525 if (result == pfile->spec_nodes.n__VA_ARGS__
526 && !pfile->state.va_args_ok)
527 cpp_pedwarn (pfile,
528 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
529 }
530
531 return result;
532}
533
534/* Slow path. This handles identifiers which have been split, and
535 identifiers which contain dollar signs. The part of the identifier
536 from PFILE->buffer->cur-1 to CUR has already been scanned. */
537static cpp_hashnode *
538parse_identifier_slow (pfile, cur)
539 cpp_reader *pfile;
540 const U_CHAR *cur;
541{
0d9f234d 542 cpp_buffer *buffer = pfile->buffer;
2c3fcba6 543 const U_CHAR *base = buffer->cur - 1;
2a967f3d 544 struct obstack *stack = &pfile->hash_table->stack;
2c3fcba6
ZW
545 unsigned int c, saw_dollar = 0, len;
546
547 /* Copy the part of the token which is known to be okay. */
548 obstack_grow (stack, base, cur - base);
041c3194 549
2c3fcba6
ZW
550 /* Now process the part which isn't. We are looking at one of
551 '$', '\\', or '?' on entry to this loop. */
552 c = *cur++;
553 buffer->cur = cur;
0d9f234d 554 do
041c3194 555 {
2c3fcba6
ZW
556 while (is_idchar (c))
557 {
558 obstack_1grow (stack, c);
45b966db 559
2c3fcba6
ZW
560 if (c == '$')
561 saw_dollar++;
ba89d661 562
2c3fcba6
ZW
563 c = EOF;
564 if (buffer->cur == buffer->rlimit)
565 break;
ba89d661 566
2c3fcba6
ZW
567 c = *buffer->cur++;
568 }
ba89d661 569
0d9f234d
NB
570 /* Potential escaped newline? */
571 if (c != '?' && c != '\\')
2c3fcba6 572 break;
29401c30 573 c = skip_escaped_newlines (pfile, c);
041c3194 574 }
0d9f234d
NB
575 while (is_idchar (c));
576
93c80368
NB
577 /* Remember the next character. */
578 buffer->read_ahead = c;
579
0d9f234d
NB
580 /* $ is not a identifier character in the standard, but is commonly
581 accepted as an extension. Don't warn about it in skipped
582 conditional blocks. */
cef0d199 583 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
0d9f234d
NB
584 cpp_pedwarn (pfile, "'$' character(s) in identifier");
585
93c80368 586 /* Identifiers are null-terminated. */
2a967f3d
NB
587 len = obstack_object_size (stack);
588 obstack_1grow (stack, '\0');
93c80368 589
2c3fcba6 590 return (cpp_hashnode *)
2a967f3d 591 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
45b966db
ZW
592}
593
0d9f234d 594/* Parse a number, skipping embedded backslash-newlines. */
45b966db 595static void
93c80368 596parse_number (pfile, number, c, leading_period)
45b966db 597 cpp_reader *pfile;
0d9f234d
NB
598 cpp_string *number;
599 cppchar_t c;
93c80368 600 int leading_period;
45b966db 601{
041c3194 602 cpp_buffer *buffer = pfile->buffer;
49fe13f6 603 cpp_pool *pool = &pfile->ident_pool;
93c80368 604 unsigned char *dest, *limit;
45b966db 605
93c80368
NB
606 dest = POOL_FRONT (pool);
607 limit = POOL_LIMIT (pool);
cbcff6df 608
93c80368
NB
609 /* Place a leading period. */
610 if (leading_period)
611 {
612 if (dest >= limit)
613 limit = _cpp_next_chunk (pool, 0, &dest);
614 *dest++ = '.';
615 }
616
0d9f234d 617 do
041c3194 618 {
0d9f234d
NB
619 do
620 {
93c80368
NB
621 /* Need room for terminating null. */
622 if (dest + 1 >= limit)
623 limit = _cpp_next_chunk (pool, 0, &dest);
624 *dest++ = c;
0d9f234d 625
0d9f234d
NB
626 c = EOF;
627 if (buffer->cur == buffer->rlimit)
628 break;
45b966db 629
0d9f234d
NB
630 c = *buffer->cur++;
631 }
93c80368 632 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
45b966db 633
0d9f234d
NB
634 /* Potential escaped newline? */
635 if (c != '?' && c != '\\')
636 break;
29401c30 637 c = skip_escaped_newlines (pfile, c);
45b966db 638 }
93c80368 639 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
cbcff6df 640
0d9f234d
NB
641 /* Remember the next character. */
642 buffer->read_ahead = c;
64aaf407 643
93c80368
NB
644 /* Null-terminate the number. */
645 *dest = '\0';
646
647 number->text = POOL_FRONT (pool);
648 number->len = dest - number->text;
649 POOL_COMMIT (pool, number->len + 1);
0d9f234d
NB
650}
651
652/* Subroutine of parse_string. Emits error for unterminated strings. */
653static void
93c80368 654unterminated (pfile, term)
0d9f234d 655 cpp_reader *pfile;
0d9f234d
NB
656 int term;
657{
658 cpp_error (pfile, "missing terminating %c character", term);
659
93c80368
NB
660 if (term == '\"' && pfile->mlstring_pos.line
661 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
041c3194 662 {
93c80368
NB
663 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
664 pfile->mlstring_pos.col,
0d9f234d 665 "possible start of unterminated string literal");
93c80368 666 pfile->mlstring_pos.line = 0;
041c3194 667 }
45b966db
ZW
668}
669
93c80368
NB
670/* Subroutine of parse_string. */
671static int
672unescaped_terminator_p (pfile, dest)
673 cpp_reader *pfile;
674 const unsigned char *dest;
675{
676 const unsigned char *start, *temp;
677
678 /* In #include-style directives, terminators are not escapeable. */
679 if (pfile->state.angled_headers)
680 return 1;
681
49fe13f6 682 start = POOL_FRONT (&pfile->ident_pool);
93c80368
NB
683
684 /* An odd number of consecutive backslashes represents an escaped
685 terminator. */
686 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
687 ;
688
689 return ((dest - temp) & 1) == 0;
690}
691
0d9f234d 692/* Parses a string, character constant, or angle-bracketed header file
7868b4a2
NB
693 name. Handles embedded trigraphs and escaped newlines. The stored
694 string is guaranteed NUL-terminated, but it is not guaranteed that
695 this is the first NUL since embedded NULs are preserved.
45b966db 696
7868b4a2 697 Multi-line strings are allowed, but they are deprecated. */
041c3194 698static void
0d9f234d 699parse_string (pfile, token, terminator)
45b966db 700 cpp_reader *pfile;
041c3194 701 cpp_token *token;
0d9f234d 702 cppchar_t terminator;
45b966db 703{
041c3194 704 cpp_buffer *buffer = pfile->buffer;
49fe13f6 705 cpp_pool *pool = &pfile->ident_pool;
93c80368 706 unsigned char *dest, *limit;
0d9f234d 707 cppchar_t c;
d82fc108 708 bool warned_nulls = false, warned_multi = false;
0d9f234d 709
93c80368
NB
710 dest = POOL_FRONT (pool);
711 limit = POOL_LIMIT (pool);
712
0d9f234d 713 for (;;)
45b966db 714 {
0d9f234d 715 if (buffer->cur == buffer->rlimit)
7868b4a2
NB
716 c = EOF;
717 else
718 c = *buffer->cur++;
719
720 have_char:
721 /* We need space for the terminating NUL. */
722 if (dest >= limit)
723 limit = _cpp_next_chunk (pool, 0, &dest);
724
725 if (c == EOF)
0d9f234d 726 {
93c80368 727 unterminated (pfile, terminator);
0d9f234d
NB
728 break;
729 }
0d9f234d 730
0d9f234d
NB
731 /* Handle trigraphs, escaped newlines etc. */
732 if (c == '?' || c == '\\')
29401c30 733 c = skip_escaped_newlines (pfile, c);
45b966db 734
93c80368 735 if (c == terminator && unescaped_terminator_p (pfile, dest))
45b966db 736 {
93c80368
NB
737 c = EOF;
738 break;
0d9f234d
NB
739 }
740 else if (is_vspace (c))
741 {
742 /* In assembly language, silently terminate string and
743 character literals at end of line. This is a kludge
744 around not knowing where comments are. */
bdb05a7b 745 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
0d9f234d 746 break;
45b966db 747
0d9f234d
NB
748 /* Character constants and header names may not extend over
749 multiple lines. In Standard C, neither may strings.
750 Unfortunately, we accept multiline strings as an
16eb2788
NB
751 extension, except in #include family directives. */
752 if (terminator != '"' || pfile->state.angled_headers)
45b966db 753 {
93c80368 754 unterminated (pfile, terminator);
0d9f234d 755 break;
45b966db 756 }
45b966db 757
d82fc108
NB
758 if (!warned_multi)
759 {
760 warned_multi = true;
761 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
762 }
763
93c80368 764 if (pfile->mlstring_pos.line == 0)
c691145a 765 pfile->mlstring_pos = pfile->lexer_pos;
0d9f234d 766
1444f2ed 767 c = handle_newline (pfile, c);
7868b4a2
NB
768 *dest++ = '\n';
769 goto have_char;
0d9f234d 770 }
d82fc108 771 else if (c == '\0' && !warned_nulls)
0d9f234d 772 {
d82fc108
NB
773 warned_nulls = true;
774 cpp_warning (pfile, "null character(s) preserved in literal");
45b966db 775 }
45b966db 776
93c80368 777 *dest++ = c;
45b966db
ZW
778 }
779
93c80368 780 /* Remember the next character. */
0d9f234d 781 buffer->read_ahead = c;
7868b4a2 782 *dest = '\0';
45b966db 783
93c80368
NB
784 token->val.str.text = POOL_FRONT (pool);
785 token->val.str.len = dest - token->val.str.text;
7868b4a2 786 POOL_COMMIT (pool, token->val.str.len + 1);
0d9f234d 787}
041c3194 788
93c80368 789/* The stored comment includes the comment start and any terminator. */
9e62c811 790static void
0d9f234d
NB
791save_comment (pfile, token, from)
792 cpp_reader *pfile;
041c3194
ZW
793 cpp_token *token;
794 const unsigned char *from;
9e62c811 795{
041c3194 796 unsigned char *buffer;
0d9f234d 797 unsigned int len;
0d9f234d 798
1c6d33ef 799 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
3542203b
NB
800 /* C++ comments probably (not definitely) have moved past a new
801 line, which we don't want to save in the comment. */
802 if (pfile->buffer->read_ahead != EOF)
803 len--;
49fe13f6 804 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
041c3194 805
041c3194 806 token->type = CPP_COMMENT;
bfb9dc7f 807 token->val.str.len = len;
0d9f234d 808 token->val.str.text = buffer;
45b966db 809
1c6d33ef
NB
810 buffer[0] = '/';
811 memcpy (buffer + 1, from, len - 1);
0d9f234d 812}
45b966db 813
cbcff6df
NB
814/* Subroutine of lex_token to handle '%'. A little tricky, since we
815 want to avoid stepping back when lexing %:%X. */
0d9f234d 816static void
29401c30
NB
817lex_percent (pfile, result)
818 cpp_reader *pfile;
0d9f234d 819 cpp_token *result;
0d9f234d 820{
29401c30 821 cpp_buffer *buffer= pfile->buffer;
cbcff6df
NB
822 cppchar_t c;
823
824 result->type = CPP_MOD;
825 /* Parsing %:%X could leave an extra character. */
826 if (buffer->extra_char == EOF)
29401c30 827 c = get_effective_char (pfile);
cbcff6df
NB
828 else
829 {
830 c = buffer->read_ahead = buffer->extra_char;
831 buffer->extra_char = EOF;
832 }
833
834 if (c == '=')
835 ACCEPT_CHAR (CPP_MOD_EQ);
29401c30 836 else if (CPP_OPTION (pfile, digraphs))
cbcff6df
NB
837 {
838 if (c == ':')
839 {
840 result->flags |= DIGRAPH;
841 ACCEPT_CHAR (CPP_HASH);
29401c30 842 if (get_effective_char (pfile) == '%')
cbcff6df 843 {
29401c30 844 buffer->extra_char = get_effective_char (pfile);
cbcff6df
NB
845 if (buffer->extra_char == ':')
846 {
847 buffer->extra_char = EOF;
848 ACCEPT_CHAR (CPP_PASTE);
849 }
850 else
851 /* We'll catch the extra_char when we're called back. */
852 buffer->read_ahead = '%';
853 }
854 }
855 else if (c == '>')
856 {
857 result->flags |= DIGRAPH;
858 ACCEPT_CHAR (CPP_CLOSE_BRACE);
859 }
860 }
861}
862
863/* Subroutine of lex_token to handle '.'. This is tricky, since we
864 want to avoid stepping back when lexing '...' or '.123'. In the
865 latter case we should also set a flag for parse_number. */
866static void
867lex_dot (pfile, result)
868 cpp_reader *pfile;
869 cpp_token *result;
870{
871 cpp_buffer *buffer = pfile->buffer;
872 cppchar_t c;
873
874 /* Parsing ..X could leave an extra character. */
875 if (buffer->extra_char == EOF)
29401c30 876 c = get_effective_char (pfile);
cbcff6df
NB
877 else
878 {
879 c = buffer->read_ahead = buffer->extra_char;
880 buffer->extra_char = EOF;
881 }
0d9f234d 882
cbcff6df
NB
883 /* All known character sets have 0...9 contiguous. */
884 if (c >= '0' && c <= '9')
885 {
886 result->type = CPP_NUMBER;
93c80368 887 parse_number (pfile, &result->val.str, c, 1);
cbcff6df 888 }
041c3194 889 else
ea4a453b 890 {
cbcff6df
NB
891 result->type = CPP_DOT;
892 if (c == '.')
893 {
29401c30 894 buffer->extra_char = get_effective_char (pfile);
cbcff6df
NB
895 if (buffer->extra_char == '.')
896 {
897 buffer->extra_char = EOF;
898 ACCEPT_CHAR (CPP_ELLIPSIS);
899 }
900 else
901 /* We'll catch the extra_char when we're called back. */
902 buffer->read_ahead = '.';
903 }
904 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
905 ACCEPT_CHAR (CPP_DOT_STAR);
ea4a453b 906 }
45b966db
ZW
907}
908
5fddcffc
NB
909/* Allocate COUNT tokens for RUN. */
910void
911_cpp_init_tokenrun (run, count)
912 tokenrun *run;
913 unsigned int count;
914{
915 run->base = xnewvec (cpp_token, count);
916 run->limit = run->base + count;
917 run->next = NULL;
918}
919
920/* Returns the next tokenrun, or creates one if there is none. */
921static tokenrun *
922next_tokenrun (run)
923 tokenrun *run;
924{
925 if (run->next == NULL)
926 {
927 run->next = xnew (tokenrun);
928 _cpp_init_tokenrun (run->next, 250);
929 }
930
931 return run->next;
932}
933
934static int
935lex_directive (pfile)
936 cpp_reader *pfile;
937{
938 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
939 tokens within the list of arguments that would otherwise act as
940 preprocessing directives, the behavior is undefined.
941
942 This implementation will report a hard error, terminate the macro
943 invocation, and proceed to process the directive. */
944 if (pfile->state.parsing_args)
945 {
946 pfile->lexer_pos.output_line = pfile->line;
947 if (pfile->state.parsing_args == 2)
948 {
949 cpp_error (pfile,
950 "directives may not be used inside a macro argument");
951 pfile->state.bol = 1;
952 pfile->buffer->cur = pfile->buffer->line_base;
953 pfile->buffer->read_ahead = EOF;
954 pfile->cur_token->type = CPP_EOF;
955 }
956
957 return 0;
958 }
959
960 /* This is a directive. If the return value is false, it is an
961 assembler #. */
962 {
963 /* FIXME: short-term kludge only - it doesn't handle the case that
964 the # is at the end of a run and we moved to the start of the
965 next one. Easily fixed once we kill lookaheads. */
966 cpp_token *token = pfile->cur_token++;
967 if (_cpp_handle_directive (pfile, token->flags & PREV_WHITE))
968 return 1;
969 pfile->cur_token = token;
970 return 0;
971 }
972}
973
974/* Lex a token into RESULT (external interface). */
93c80368
NB
975void
976_cpp_lex_token (pfile, result)
45b966db 977 cpp_reader *pfile;
0d9f234d 978 cpp_token *result;
5fddcffc
NB
979{
980 if (pfile->cur_token == pfile->cur_run->limit)
981 {
982 pfile->cur_run = next_tokenrun (pfile->cur_run);
983 pfile->cur_token = pfile->cur_run->base;
984 }
985
986 next_token:
987 if (pfile->state.bol)
988 {
989 start_new_line:
990 pfile->state.bol = 0;
991
992 /* Return lexer back to base. */
993 if (!pfile->keep_tokens)
994 {
995 pfile->cur_run = &pfile->base_run;
996 pfile->cur_token = pfile->base_run.base;
997 }
998
999 lex_token (pfile, pfile->cur_token, 1);
1000 pfile->lexer_pos.output_line = pfile->cur_token->line;
1001 if (pfile->cur_token->type == CPP_HASH && lex_directive (pfile))
1002 goto start_new_line;
1003 }
1004 else
1005 {
1006 lex_token (pfile, pfile->cur_token, 0);
1007 if (pfile->cur_token->type == CPP_EOF)
1008 {
1009 if (!pfile->state.in_directive)
1010 goto start_new_line;
1011 /* Decrementing pfile->line allows directives to recognise
1012 that the newline has been seen, and also means that
1013 diagnostics don't point to the next line. */
1014 pfile->lexer_pos.output_line = pfile->line--;
1015 }
1016 }
1017
1018 if (!pfile->state.in_directive)
1019 {
1020 if (pfile->state.skipping && pfile->cur_token->type != CPP_EOF)
1021 goto next_token;
1022
1023 /* Outside a directive, invalidate controlling macros. */
1024 pfile->mi_valid = false;
1025 }
1026
1027 *result = *pfile->cur_token++;
1028}
1029
1030/* Lex a token into RESULT (internal interface). */
1031static void
1032lex_token (pfile, result, skip_newlines)
1033 cpp_reader *pfile;
1034 cpp_token *result;
1035 int skip_newlines;
45b966db 1036{
0d9f234d 1037 cppchar_t c;
adb84b42 1038 cpp_buffer *buffer;
0d9f234d 1039 const unsigned char *comment_start;
9ec7291f 1040
5fddcffc 1041 fresh_line:
adb84b42 1042 buffer = pfile->buffer;
bd969772
NB
1043 result->flags = buffer->saved_flags;
1044 buffer->saved_flags = 0;
5fddcffc 1045 update_tokens_line:
67821e3a 1046 pfile->lexer_pos.line = pfile->line;
1444f2ed 1047 result->line = pfile->line;
041c3194 1048
5fddcffc 1049 skipped_white:
0d9f234d
NB
1050 c = buffer->read_ahead;
1051 if (c == EOF && buffer->cur < buffer->rlimit)
5fddcffc
NB
1052 c = *buffer->cur++;
1053 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1054 pfile->lexer_pos.col = result->col;
0d9f234d 1055 buffer->read_ahead = EOF;
5fddcffc
NB
1056
1057 trigraph:
0d9f234d 1058 switch (c)
45b966db 1059 {
0d9f234d 1060 case EOF:
5fddcffc 1061 if (!pfile->state.parsing_args && !pfile->state.in_directive)
ef6e958a 1062 {
5fddcffc 1063 if (buffer->cur == buffer->line_base)
7364fdd8 1064 {
5fddcffc
NB
1065 /* Don't pop the last buffer. */
1066 if (buffer->prev)
1067 {
1068 unsigned char stop = buffer->return_at_eof;
7364fdd8 1069
5fddcffc
NB
1070 _cpp_pop_buffer (pfile);
1071 if (!stop)
1072 goto fresh_line;
1073 }
1074 }
1075 else
1076 {
1077 /* Non-empty files should end in a newline. Don't warn
1078 for command line and _Pragma buffers. */
1079 if (!buffer->from_stage3)
1080 cpp_pedwarn (pfile, "no newline at end of file");
1081 handle_newline (pfile, '\n');
7364fdd8 1082 }
ef6e958a 1083 }
0d9f234d 1084 result->type = CPP_EOF;
5fddcffc 1085 break;
45b966db 1086
0d9f234d
NB
1087 case ' ': case '\t': case '\f': case '\v': case '\0':
1088 skip_whitespace (pfile, c);
1089 result->flags |= PREV_WHITE;
5fddcffc 1090 goto skipped_white;
0d9f234d
NB
1091
1092 case '\n': case '\r':
5fddcffc
NB
1093 if (pfile->state.in_directive && pfile->state.parsing_args)
1094 buffer->read_ahead = c;
1095 else
45b966db 1096 {
5fddcffc
NB
1097 handle_newline (pfile, c);
1098 if (skip_newlines)
1099 goto fresh_line;
45b966db 1100 }
5fddcffc
NB
1101 result->type = CPP_EOF;
1102 break;
46d07497 1103
0d9f234d
NB
1104 case '?':
1105 case '\\':
1106 /* These could start an escaped newline, or '?' a trigraph. Let
1107 skip_escaped_newlines do all the work. */
1108 {
67821e3a 1109 unsigned int line = pfile->line;
0d9f234d 1110
29401c30 1111 c = skip_escaped_newlines (pfile, c);
67821e3a 1112 if (line != pfile->line)
0d9f234d
NB
1113 /* We had at least one escaped newline of some sort, and the
1114 next character is in buffer->read_ahead. Update the
1115 token's line and column. */
5fddcffc 1116 goto update_tokens_line;
0d9f234d
NB
1117
1118 /* We are either the original '?' or '\\', or a trigraph. */
1119 result->type = CPP_QUERY;
1120 buffer->read_ahead = EOF;
1121 if (c == '\\')
12c4f523 1122 goto random_char;
0d9f234d 1123 else if (c != '?')
5fddcffc 1124 goto trigraph;
0d9f234d
NB
1125 }
1126 break;
46d07497 1127
0d9f234d
NB
1128 case '0': case '1': case '2': case '3': case '4':
1129 case '5': case '6': case '7': case '8': case '9':
1130 result->type = CPP_NUMBER;
93c80368 1131 parse_number (pfile, &result->val.str, c, 0);
0d9f234d 1132 break;
46d07497 1133
0d9f234d
NB
1134 case '$':
1135 if (!CPP_OPTION (pfile, dollars_in_ident))
1136 goto random_char;
ec5c56db 1137 /* Fall through... */
0d9f234d
NB
1138
1139 case '_':
1140 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1141 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1142 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1143 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1144 case 'y': case 'z':
1145 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1146 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1147 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1148 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1149 case 'Y': case 'Z':
1150 result->type = CPP_NAME;
2c3fcba6 1151 result->val.node = parse_identifier (pfile);
0d9f234d
NB
1152
1153 /* 'L' may introduce wide characters or strings. */
93c80368 1154 if (result->val.node == pfile->spec_nodes.n_L)
0d9f234d 1155 {
2c3fcba6
ZW
1156 c = buffer->read_ahead;
1157 if (c == EOF && buffer->cur < buffer->rlimit)
1158 c = *buffer->cur;
0d9f234d 1159 if (c == '\'' || c == '"')
ba89d661 1160 {
2c3fcba6 1161 buffer->cur++;
0d9f234d
NB
1162 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1163 goto make_string;
ba89d661 1164 }
0d9f234d
NB
1165 }
1166 /* Convert named operators to their proper types. */
93c80368 1167 else if (result->val.node->flags & NODE_OPERATOR)
0d9f234d
NB
1168 {
1169 result->flags |= NAMED_OP;
93c80368 1170 result->type = result->val.node->value.operator;
0d9f234d
NB
1171 }
1172 break;
1173
1174 case '\'':
1175 case '"':
1176 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1177 make_string:
1178 parse_string (pfile, result, c);
1179 break;
041c3194 1180
0d9f234d 1181 case '/':
1c6d33ef
NB
1182 /* A potential block or line comment. */
1183 comment_start = buffer->cur;
0d9f234d 1184 result->type = CPP_DIV;
29401c30 1185 c = get_effective_char (pfile);
0d9f234d
NB
1186 if (c == '=')
1187 ACCEPT_CHAR (CPP_DIV_EQ);
1c6d33ef
NB
1188 if (c != '/' && c != '*')
1189 break;
e61fc951 1190
1c6d33ef
NB
1191 if (c == '*')
1192 {
0d9f234d 1193 if (skip_block_comment (pfile))
67821e3a 1194 cpp_error (pfile, "unterminated comment");
0d9f234d 1195 }
1c6d33ef 1196 else
0d9f234d 1197 {
1c6d33ef
NB
1198 if (!CPP_OPTION (pfile, cplusplus_comments)
1199 && !CPP_IN_SYSTEM_HEADER (pfile))
1200 break;
1201
bdb05a7b
NB
1202 /* Warn about comments only if pedantically GNUC89, and not
1203 in system headers. */
1204 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 1205 && ! buffer->warned_cplusplus_comments)
041c3194 1206 {
1c6d33ef
NB
1207 cpp_pedwarn (pfile,
1208 "C++ style comments are not allowed in ISO C89");
1209 cpp_pedwarn (pfile,
1210 "(this will be reported only once per input file)");
1211 buffer->warned_cplusplus_comments = 1;
1212 }
0d9f234d 1213
a94c1199 1214 /* Skip_line_comment updates buffer->read_ahead. */
01ef6563 1215 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
93c80368
NB
1216 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1217 pfile->lexer_pos.col,
1c6d33ef
NB
1218 "multi-line comment");
1219 }
0d9f234d 1220
1c6d33ef
NB
1221 /* Skipping the comment has updated buffer->read_ahead. */
1222 if (!pfile->state.save_comments)
1223 {
1224 result->flags |= PREV_WHITE;
5fddcffc 1225 goto update_tokens_line;
0d9f234d 1226 }
1c6d33ef
NB
1227
1228 /* Save the comment as a token in its own right. */
1229 save_comment (pfile, result, comment_start);
29b10746
NB
1230 /* Don't do MI optimisation. */
1231 return;
0d9f234d
NB
1232
1233 case '<':
1234 if (pfile->state.angled_headers)
1235 {
1236 result->type = CPP_HEADER_NAME;
1237 c = '>'; /* terminator. */
1238 goto make_string;
1239 }
45b966db 1240
0d9f234d 1241 result->type = CPP_LESS;
29401c30 1242 c = get_effective_char (pfile);
0d9f234d
NB
1243 if (c == '=')
1244 ACCEPT_CHAR (CPP_LESS_EQ);
1245 else if (c == '<')
1246 {
1247 ACCEPT_CHAR (CPP_LSHIFT);
29401c30 1248 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1249 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1250 }
1251 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1252 {
1253 ACCEPT_CHAR (CPP_MIN);
29401c30 1254 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1255 ACCEPT_CHAR (CPP_MIN_EQ);
1256 }
1257 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1258 {
1259 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1260 result->flags |= DIGRAPH;
1261 }
1262 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1263 {
1264 ACCEPT_CHAR (CPP_OPEN_BRACE);
1265 result->flags |= DIGRAPH;
1266 }
1267 break;
1268
1269 case '>':
1270 result->type = CPP_GREATER;
29401c30 1271 c = get_effective_char (pfile);
0d9f234d
NB
1272 if (c == '=')
1273 ACCEPT_CHAR (CPP_GREATER_EQ);
1274 else if (c == '>')
1275 {
1276 ACCEPT_CHAR (CPP_RSHIFT);
29401c30 1277 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1278 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1279 }
1280 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1281 {
1282 ACCEPT_CHAR (CPP_MAX);
29401c30 1283 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1284 ACCEPT_CHAR (CPP_MAX_EQ);
1285 }
1286 break;
1287
cbcff6df 1288 case '%':
29401c30 1289 lex_percent (pfile, result);
0d9f234d
NB
1290 break;
1291
cbcff6df
NB
1292 case '.':
1293 lex_dot (pfile, result);
0d9f234d 1294 break;
45b966db 1295
0d9f234d
NB
1296 case '+':
1297 result->type = CPP_PLUS;
29401c30 1298 c = get_effective_char (pfile);
0d9f234d
NB
1299 if (c == '=')
1300 ACCEPT_CHAR (CPP_PLUS_EQ);
1301 else if (c == '+')
1302 ACCEPT_CHAR (CPP_PLUS_PLUS);
1303 break;
04e3ec78 1304
0d9f234d
NB
1305 case '-':
1306 result->type = CPP_MINUS;
29401c30 1307 c = get_effective_char (pfile);
0d9f234d
NB
1308 if (c == '>')
1309 {
1310 ACCEPT_CHAR (CPP_DEREF);
1311 if (CPP_OPTION (pfile, cplusplus)
29401c30 1312 && get_effective_char (pfile) == '*')
0d9f234d
NB
1313 ACCEPT_CHAR (CPP_DEREF_STAR);
1314 }
1315 else if (c == '=')
1316 ACCEPT_CHAR (CPP_MINUS_EQ);
1317 else if (c == '-')
1318 ACCEPT_CHAR (CPP_MINUS_MINUS);
1319 break;
45b966db 1320
0d9f234d
NB
1321 case '*':
1322 result->type = CPP_MULT;
29401c30 1323 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1324 ACCEPT_CHAR (CPP_MULT_EQ);
1325 break;
04e3ec78 1326
0d9f234d
NB
1327 case '=':
1328 result->type = CPP_EQ;
29401c30 1329 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1330 ACCEPT_CHAR (CPP_EQ_EQ);
1331 break;
f8f769ea 1332
0d9f234d
NB
1333 case '!':
1334 result->type = CPP_NOT;
29401c30 1335 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1336 ACCEPT_CHAR (CPP_NOT_EQ);
1337 break;
45b966db 1338
0d9f234d
NB
1339 case '&':
1340 result->type = CPP_AND;
29401c30 1341 c = get_effective_char (pfile);
0d9f234d
NB
1342 if (c == '=')
1343 ACCEPT_CHAR (CPP_AND_EQ);
1344 else if (c == '&')
1345 ACCEPT_CHAR (CPP_AND_AND);
1346 break;
1347
1348 case '#':
a949941c 1349 result->type = CPP_HASH;
5fddcffc
NB
1350 if (get_effective_char (pfile) == '#')
1351 ACCEPT_CHAR (CPP_PASTE);
0d9f234d 1352 break;
45b966db 1353
0d9f234d
NB
1354 case '|':
1355 result->type = CPP_OR;
29401c30 1356 c = get_effective_char (pfile);
0d9f234d
NB
1357 if (c == '=')
1358 ACCEPT_CHAR (CPP_OR_EQ);
1359 else if (c == '|')
1360 ACCEPT_CHAR (CPP_OR_OR);
1361 break;
45b966db 1362
0d9f234d
NB
1363 case '^':
1364 result->type = CPP_XOR;
29401c30 1365 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1366 ACCEPT_CHAR (CPP_XOR_EQ);
1367 break;
45b966db 1368
0d9f234d
NB
1369 case ':':
1370 result->type = CPP_COLON;
29401c30 1371 c = get_effective_char (pfile);
0d9f234d
NB
1372 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1373 ACCEPT_CHAR (CPP_SCOPE);
1374 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1375 {
1376 result->flags |= DIGRAPH;
1377 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1378 }
1379 break;
45b966db 1380
0d9f234d
NB
1381 case '~': result->type = CPP_COMPL; break;
1382 case ',': result->type = CPP_COMMA; break;
1383 case '(': result->type = CPP_OPEN_PAREN; break;
1384 case ')': result->type = CPP_CLOSE_PAREN; break;
1385 case '[': result->type = CPP_OPEN_SQUARE; break;
1386 case ']': result->type = CPP_CLOSE_SQUARE; break;
1387 case '{': result->type = CPP_OPEN_BRACE; break;
1388 case '}': result->type = CPP_CLOSE_BRACE; break;
1389 case ';': result->type = CPP_SEMICOLON; break;
1390
cc937581
ZW
1391 /* @ is a punctuator in Objective C. */
1392 case '@': result->type = CPP_ATSIGN; break;
0d9f234d
NB
1393
1394 random_char:
1395 default:
1396 result->type = CPP_OTHER;
6c53ebff 1397 result->val.c = c;
0d9f234d
NB
1398 break;
1399 }
1400}
1401
93c80368
NB
1402/* An upper bound on the number of bytes needed to spell a token,
1403 including preceding whitespace. */
1404unsigned int
1405cpp_token_len (token)
1406 const cpp_token *token;
0d9f234d 1407{
93c80368 1408 unsigned int len;
6d2c2047 1409
93c80368 1410 switch (TOKEN_SPELL (token))
041c3194 1411 {
a28c5035
NB
1412 default: len = 0; break;
1413 case SPELL_STRING: len = token->val.str.len; break;
1414 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
041c3194 1415 }
93c80368
NB
1416 /* 1 for whitespace, 4 for comment delimeters. */
1417 return len + 5;
6d2c2047
ZW
1418}
1419
041c3194 1420/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1421 already contain the enough space to hold the token's spelling.
1422 Returns a pointer to the character after the last character
1423 written. */
93c80368
NB
1424unsigned char *
1425cpp_spell_token (pfile, token, buffer)
041c3194
ZW
1426 cpp_reader *pfile; /* Would be nice to be rid of this... */
1427 const cpp_token *token;
1428 unsigned char *buffer;
1429{
96be6998 1430 switch (TOKEN_SPELL (token))
041c3194
ZW
1431 {
1432 case SPELL_OPERATOR:
1433 {
1434 const unsigned char *spelling;
1435 unsigned char c;
d6d5f795 1436
041c3194 1437 if (token->flags & DIGRAPH)
37b8524c
JDA
1438 spelling
1439 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
92936ecf
ZW
1440 else if (token->flags & NAMED_OP)
1441 goto spell_ident;
041c3194 1442 else
96be6998 1443 spelling = TOKEN_NAME (token);
041c3194
ZW
1444
1445 while ((c = *spelling++) != '\0')
1446 *buffer++ = c;
1447 }
1448 break;
d6d5f795 1449
041c3194 1450 case SPELL_IDENT:
92936ecf 1451 spell_ident:
a28c5035
NB
1452 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1453 buffer += NODE_LEN (token->val.node);
041c3194 1454 break;
d6d5f795 1455
041c3194
ZW
1456 case SPELL_STRING:
1457 {
ba89d661
ZW
1458 int left, right, tag;
1459 switch (token->type)
1460 {
1461 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1462 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
ba89d661
ZW
1463 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1464 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1465 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1466 default: left = '\0'; right = '\0'; tag = '\0'; break;
1467 }
1468 if (tag) *buffer++ = tag;
1469 if (left) *buffer++ = left;
bfb9dc7f
ZW
1470 memcpy (buffer, token->val.str.text, token->val.str.len);
1471 buffer += token->val.str.len;
ba89d661 1472 if (right) *buffer++ = right;
041c3194
ZW
1473 }
1474 break;
d6d5f795 1475
041c3194 1476 case SPELL_CHAR:
6c53ebff 1477 *buffer++ = token->val.c;
041c3194 1478 break;
d6d5f795 1479
041c3194 1480 case SPELL_NONE:
96be6998 1481 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1482 break;
1483 }
d6d5f795 1484
041c3194
ZW
1485 return buffer;
1486}
d6d5f795 1487
93c80368
NB
1488/* Returns a token as a null-terminated string. The string is
1489 temporary, and automatically freed later. Useful for diagnostics. */
1490unsigned char *
1491cpp_token_as_text (pfile, token)
c5a04734 1492 cpp_reader *pfile;
041c3194 1493 const cpp_token *token;
c5a04734 1494{
93c80368 1495 unsigned int len = cpp_token_len (token);
49fe13f6 1496 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
c5a04734 1497
93c80368
NB
1498 end = cpp_spell_token (pfile, token, start);
1499 end[0] = '\0';
c5a04734 1500
93c80368
NB
1501 return start;
1502}
c5a04734 1503
93c80368
NB
1504/* Used by C front ends. Should really move to using cpp_token_as_text. */
1505const char *
1506cpp_type2name (type)
1507 enum cpp_ttype type;
1508{
1509 return (const char *) token_spellings[type].name;
1510}
c5a04734 1511
93c80368
NB
1512/* Writes the spelling of token to FP. Separate from cpp_spell_token
1513 for efficiency - to avoid double-buffering. Also, outputs a space
1514 if PREV_WHITE is flagged. */
1515void
1516cpp_output_token (token, fp)
1517 const cpp_token *token;
1518 FILE *fp;
1519{
1520 if (token->flags & PREV_WHITE)
1521 putc (' ', fp);
d8090680 1522
93c80368 1523 switch (TOKEN_SPELL (token))
c5a04734 1524 {
93c80368
NB
1525 case SPELL_OPERATOR:
1526 {
1527 const unsigned char *spelling;
c5a04734 1528
93c80368 1529 if (token->flags & DIGRAPH)
37b8524c
JDA
1530 spelling
1531 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
93c80368
NB
1532 else if (token->flags & NAMED_OP)
1533 goto spell_ident;
1534 else
1535 spelling = TOKEN_NAME (token);
041c3194 1536
93c80368
NB
1537 ufputs (spelling, fp);
1538 }
1539 break;
041c3194 1540
93c80368
NB
1541 spell_ident:
1542 case SPELL_IDENT:
a28c5035 1543 ufputs (NODE_NAME (token->val.node), fp);
93c80368 1544 break;
041c3194 1545
93c80368
NB
1546 case SPELL_STRING:
1547 {
1548 int left, right, tag;
1549 switch (token->type)
1550 {
1551 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1552 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
93c80368
NB
1553 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1554 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1555 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1556 default: left = '\0'; right = '\0'; tag = '\0'; break;
1557 }
1558 if (tag) putc (tag, fp);
1559 if (left) putc (left, fp);
1560 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1561 if (right) putc (right, fp);
1562 }
1563 break;
c5a04734 1564
93c80368 1565 case SPELL_CHAR:
6c53ebff 1566 putc (token->val.c, fp);
93c80368 1567 break;
c5a04734 1568
93c80368
NB
1569 case SPELL_NONE:
1570 /* An error, most probably. */
1571 break;
041c3194 1572 }
c5a04734
ZW
1573}
1574
93c80368
NB
1575/* Compare two tokens. */
1576int
1577_cpp_equiv_tokens (a, b)
1578 const cpp_token *a, *b;
c5a04734 1579{
93c80368
NB
1580 if (a->type == b->type && a->flags == b->flags)
1581 switch (TOKEN_SPELL (a))
1582 {
1583 default: /* Keep compiler happy. */
1584 case SPELL_OPERATOR:
1585 return 1;
1586 case SPELL_CHAR:
6c53ebff 1587 return a->val.c == b->val.c; /* Character. */
93c80368 1588 case SPELL_NONE:
56051c0a 1589 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
93c80368
NB
1590 case SPELL_IDENT:
1591 return a->val.node == b->val.node;
1592 case SPELL_STRING:
1593 return (a->val.str.len == b->val.str.len
1594 && !memcmp (a->val.str.text, b->val.str.text,
1595 a->val.str.len));
1596 }
c5a04734 1597
041c3194
ZW
1598 return 0;
1599}
1600
041c3194
ZW
1601/* Determine whether two tokens can be pasted together, and if so,
1602 what the resulting token is. Returns CPP_EOF if the tokens cannot
1603 be pasted, or the appropriate type for the merged token if they
1604 can. */
7de4d004 1605enum cpp_ttype
93c80368 1606cpp_can_paste (pfile, token1, token2, digraph)
041c3194
ZW
1607 cpp_reader * pfile;
1608 const cpp_token *token1, *token2;
1609 int* digraph;
c5a04734 1610{
041c3194
ZW
1611 enum cpp_ttype a = token1->type, b = token2->type;
1612 int cxx = CPP_OPTION (pfile, cplusplus);
c5a04734 1613
92936ecf
ZW
1614 /* Treat named operators as if they were ordinary NAMEs. */
1615 if (token1->flags & NAMED_OP)
1616 a = CPP_NAME;
1617 if (token2->flags & NAMED_OP)
1618 b = CPP_NAME;
1619
37b8524c
JDA
1620 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1621 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
c5a04734 1622
041c3194 1623 switch (a)
c5a04734 1624 {
041c3194
ZW
1625 case CPP_GREATER:
1626 if (b == a) return CPP_RSHIFT;
1627 if (b == CPP_QUERY && cxx) return CPP_MAX;
1628 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1629 break;
1630 case CPP_LESS:
1631 if (b == a) return CPP_LSHIFT;
1632 if (b == CPP_QUERY && cxx) return CPP_MIN;
1633 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
9b55f29a
NB
1634 if (CPP_OPTION (pfile, digraphs))
1635 {
1636 if (b == CPP_COLON)
1637 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1638 if (b == CPP_MOD)
1639 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1640 }
041c3194 1641 break;
c5a04734 1642
041c3194
ZW
1643 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1644 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1645 case CPP_OR: if (b == a) return CPP_OR_OR; break;
c5a04734 1646
041c3194
ZW
1647 case CPP_MINUS:
1648 if (b == a) return CPP_MINUS_MINUS;
1649 if (b == CPP_GREATER) return CPP_DEREF;
1650 break;
1651 case CPP_COLON:
1652 if (b == a && cxx) return CPP_SCOPE;
9b55f29a 1653 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
041c3194
ZW
1654 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1655 break;
1656
1657 case CPP_MOD:
9b55f29a
NB
1658 if (CPP_OPTION (pfile, digraphs))
1659 {
1660 if (b == CPP_GREATER)
1661 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1662 if (b == CPP_COLON)
1663 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1664 }
041c3194
ZW
1665 break;
1666 case CPP_DEREF:
1667 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1668 break;
1669 case CPP_DOT:
1670 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1671 if (b == CPP_NUMBER) return CPP_NUMBER;
1672 break;
1673
1674 case CPP_HASH:
1675 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1676 /* %:%: digraph */
1677 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1678 break;
1679
1680 case CPP_NAME:
1681 if (b == CPP_NAME) return CPP_NAME;
1682 if (b == CPP_NUMBER
93c80368 1683 && name_p (pfile, &token2->val.str)) return CPP_NAME;
041c3194 1684 if (b == CPP_CHAR
93c80368 1685 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
041c3194 1686 if (b == CPP_STRING
93c80368 1687 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
041c3194
ZW
1688 break;
1689
1690 case CPP_NUMBER:
1691 if (b == CPP_NUMBER) return CPP_NUMBER;
1692 if (b == CPP_NAME) return CPP_NUMBER;
1693 if (b == CPP_DOT) return CPP_NUMBER;
1694 /* Numbers cannot have length zero, so this is safe. */
1695 if ((b == CPP_PLUS || b == CPP_MINUS)
bfb9dc7f 1696 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
041c3194
ZW
1697 return CPP_NUMBER;
1698 break;
1699
1700 default:
1701 break;
c5a04734
ZW
1702 }
1703
041c3194
ZW
1704 return CPP_EOF;
1705}
1706
93c80368
NB
1707/* Returns nonzero if a space should be inserted to avoid an
1708 accidental token paste for output. For simplicity, it is
1709 conservative, and occasionally advises a space where one is not
1710 needed, e.g. "." and ".2". */
041c3194 1711
93c80368
NB
1712int
1713cpp_avoid_paste (pfile, token1, token2)
c5a04734 1714 cpp_reader *pfile;
93c80368 1715 const cpp_token *token1, *token2;
c5a04734 1716{
93c80368
NB
1717 enum cpp_ttype a = token1->type, b = token2->type;
1718 cppchar_t c;
c5a04734 1719
93c80368
NB
1720 if (token1->flags & NAMED_OP)
1721 a = CPP_NAME;
1722 if (token2->flags & NAMED_OP)
1723 b = CPP_NAME;
c5a04734 1724
93c80368
NB
1725 c = EOF;
1726 if (token2->flags & DIGRAPH)
37b8524c 1727 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
1728 else if (token_spellings[b].category == SPELL_OPERATOR)
1729 c = token_spellings[b].name[0];
c5a04734 1730
93c80368 1731 /* Quickly get everything that can paste with an '='. */
37b8524c 1732 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 1733 return 1;
c5a04734 1734
93c80368 1735 switch (a)
c5a04734 1736 {
93c80368
NB
1737 case CPP_GREATER: return c == '>' || c == '?';
1738 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1739 case CPP_PLUS: return c == '+';
1740 case CPP_MINUS: return c == '-' || c == '>';
1741 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1742 case CPP_MOD: return c == ':' || c == '>';
1743 case CPP_AND: return c == '&';
1744 case CPP_OR: return c == '|';
1745 case CPP_COLON: return c == ':' || c == '>';
1746 case CPP_DEREF: return c == '*';
26ec42ee 1747 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
1748 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1749 case CPP_NAME: return ((b == CPP_NUMBER
1750 && name_p (pfile, &token2->val.str))
1751 || b == CPP_NAME
1752 || b == CPP_CHAR || b == CPP_STRING); /* L */
1753 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1754 || c == '.' || c == '+' || c == '-');
1755 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
6c53ebff 1756 && token1->val.c == '@'
93c80368
NB
1757 && (b == CPP_NAME || b == CPP_STRING));
1758 default: break;
c5a04734 1759 }
c5a04734 1760
417f3e3a 1761 return 0;
c5a04734
ZW
1762}
1763
93c80368
NB
1764/* Output all the remaining tokens on the current line, and a newline
1765 character, to FP. Leading whitespace is removed. */
c5a04734 1766void
93c80368 1767cpp_output_line (pfile, fp)
c5a04734 1768 cpp_reader *pfile;
93c80368 1769 FILE *fp;
c5a04734 1770{
93c80368 1771 cpp_token token;
96be6998 1772
7f2f1a66 1773 cpp_get_token (pfile, &token);
93c80368
NB
1774 token.flags &= ~PREV_WHITE;
1775 while (token.type != CPP_EOF)
96be6998 1776 {
93c80368 1777 cpp_output_token (&token, fp);
7f2f1a66 1778 cpp_get_token (pfile, &token);
96be6998
ZW
1779 }
1780
93c80368 1781 putc ('\n', fp);
041c3194 1782}
c5a04734 1783
c8a96070
NB
1784/* Returns the value of a hexadecimal digit. */
1785static unsigned int
1786hex_digit_value (c)
1787 unsigned int c;
1788{
1789 if (c >= 'a' && c <= 'f')
1790 return c - 'a' + 10;
1791 if (c >= 'A' && c <= 'F')
1792 return c - 'A' + 10;
1793 if (c >= '0' && c <= '9')
1794 return c - '0';
1795 abort ();
1796}
1797
62729350
NB
1798/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1799 failure if cpplib is not parsing C++ or C99. Such failure is
1800 silent, and no variables are updated. Otherwise returns 0, and
1801 warns if -Wtraditional.
c8a96070
NB
1802
1803 [lex.charset]: The character designated by the universal character
1804 name \UNNNNNNNN is that character whose character short name in
1805 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1806 universal character name \uNNNN is that character whose character
1807 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1808 for a universal character name is less than 0x20 or in the range
1809 0x7F-0x9F (inclusive), or if the universal character name
1810 designates a character in the basic source character set, then the
1811 program is ill-formed.
1812
1813 We assume that wchar_t is Unicode, so we don't need to do any
62729350 1814 mapping. Is this ever wrong?
c8a96070 1815
62729350
NB
1816 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1817 LIMIT is the end of the string or charconst. PSTR is updated to
1818 point after the UCS on return, and the UCS is written into PC. */
1819
1820static int
1821maybe_read_ucs (pfile, pstr, limit, pc)
c8a96070
NB
1822 cpp_reader *pfile;
1823 const unsigned char **pstr;
1824 const unsigned char *limit;
62729350 1825 unsigned int *pc;
c8a96070
NB
1826{
1827 const unsigned char *p = *pstr;
62729350
NB
1828 unsigned int code = 0;
1829 unsigned int c = *pc, length;
1830
1831 /* Only attempt to interpret a UCS for C++ and C99. */
1832 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1833 return 1;
c8a96070 1834
62729350
NB
1835 if (CPP_WTRADITIONAL (pfile))
1836 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
c8a96070 1837
f8710242
NB
1838 length = (c == 'u' ? 4: 8);
1839
1840 if ((size_t) (limit - p) < length)
1841 {
1842 cpp_error (pfile, "incomplete universal-character-name");
1843 /* Skip to the end to avoid more diagnostics. */
1844 p = limit;
1845 }
1846 else
1847 {
1848 for (; length; length--, p++)
c8a96070 1849 {
f8710242
NB
1850 c = *p;
1851 if (ISXDIGIT (c))
1852 code = (code << 4) + hex_digit_value (c);
1853 else
1854 {
1855 cpp_error (pfile,
1856 "non-hex digit '%c' in universal-character-name", c);
1857 /* We shouldn't skip in case there are multibyte chars. */
1858 break;
1859 }
c8a96070 1860 }
c8a96070
NB
1861 }
1862
1863#ifdef TARGET_EBCDIC
1864 cpp_error (pfile, "universal-character-name on EBCDIC target");
1865 code = 0x3f; /* EBCDIC invalid character */
1866#else
f8710242
NB
1867 /* True extended characters are OK. */
1868 if (code >= 0xa0
1869 && !(code & 0x80000000)
1870 && !(code >= 0xD800 && code <= 0xDFFF))
1871 ;
1872 /* The standard permits $, @ and ` to be specified as UCNs. We use
1873 hex escapes so that this also works with EBCDIC hosts. */
1874 else if (code == 0x24 || code == 0x40 || code == 0x60)
1875 ;
1876 /* Don't give another error if one occurred above. */
1877 else if (length == 0)
1878 cpp_error (pfile, "universal-character-name out of range");
c8a96070
NB
1879#endif
1880
1881 *pstr = p;
62729350
NB
1882 *pc = code;
1883 return 0;
c8a96070
NB
1884}
1885
1886/* Interpret an escape sequence, and return its value. PSTR points to
1887 the input pointer, which is just after the backslash. LIMIT is how
62729350
NB
1888 much text we have. MASK is a bitmask for the precision for the
1889 destination type (char or wchar_t). TRADITIONAL, if true, does not
1890 interpret escapes that did not exist in traditional C.
c8a96070 1891
62729350
NB
1892 Handles all relevant diagnostics. */
1893
1894unsigned int
1895cpp_parse_escape (pfile, pstr, limit, mask, traditional)
c8a96070
NB
1896 cpp_reader *pfile;
1897 const unsigned char **pstr;
1898 const unsigned char *limit;
62729350 1899 unsigned HOST_WIDE_INT mask;
c8a96070
NB
1900 int traditional;
1901{
1902 int unknown = 0;
1903 const unsigned char *str = *pstr;
1904 unsigned int c = *str++;
1905
1906 switch (c)
1907 {
1908 case '\\': case '\'': case '"': case '?': break;
1909 case 'b': c = TARGET_BS; break;
1910 case 'f': c = TARGET_FF; break;
1911 case 'n': c = TARGET_NEWLINE; break;
1912 case 'r': c = TARGET_CR; break;
1913 case 't': c = TARGET_TAB; break;
1914 case 'v': c = TARGET_VT; break;
1915
1916 case '(': case '{': case '[': case '%':
1917 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1918 '\%' is used to prevent SCCS from getting confused. */
1919 unknown = CPP_PEDANTIC (pfile);
1920 break;
1921
1922 case 'a':
1923 if (CPP_WTRADITIONAL (pfile))
1924 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1925 if (!traditional)
1926 c = TARGET_BELL;
1927 break;
1928
1929 case 'e': case 'E':
1930 if (CPP_PEDANTIC (pfile))
1931 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1932 c = TARGET_ESC;
1933 break;
1934
c8a96070 1935 case 'u': case 'U':
62729350 1936 unknown = maybe_read_ucs (pfile, &str, limit, &c);
c8a96070
NB
1937 break;
1938
1939 case 'x':
1940 if (CPP_WTRADITIONAL (pfile))
1941 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1942
1943 if (!traditional)
1944 {
1945 unsigned int i = 0, overflow = 0;
1946 int digits_found = 0;
1947
1948 while (str < limit)
1949 {
1950 c = *str;
1951 if (! ISXDIGIT (c))
1952 break;
1953 str++;
1954 overflow |= i ^ (i << 4 >> 4);
1955 i = (i << 4) + hex_digit_value (c);
1956 digits_found = 1;
1957 }
1958
1959 if (!digits_found)
1960 cpp_error (pfile, "\\x used with no following hex digits");
1961
1962 if (overflow | (i != (i & mask)))
1963 {
1964 cpp_pedwarn (pfile, "hex escape sequence out of range");
1965 i &= mask;
1966 }
1967 c = i;
1968 }
1969 break;
1970
1971 case '0': case '1': case '2': case '3':
1972 case '4': case '5': case '6': case '7':
1973 {
1974 unsigned int i = c - '0';
1975 int count = 0;
1976
1977 while (str < limit && ++count < 3)
1978 {
1979 c = *str;
1980 if (c < '0' || c > '7')
1981 break;
1982 str++;
1983 i = (i << 3) + c - '0';
1984 }
1985
1986 if (i != (i & mask))
1987 {
1988 cpp_pedwarn (pfile, "octal escape sequence out of range");
1989 i &= mask;
1990 }
1991 c = i;
1992 }
1993 break;
1994
1995 default:
1996 unknown = 1;
1997 break;
1998 }
1999
2000 if (unknown)
2001 {
2002 if (ISGRAPH (c))
2003 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
2004 else
2005 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
2006 }
2007
62729350
NB
2008 if (c > mask)
2009 cpp_pedwarn (pfile, "escape sequence out of range for character");
2010
c8a96070
NB
2011 *pstr = str;
2012 return c;
2013}
2014
2015#ifndef MAX_CHAR_TYPE_SIZE
2016#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
2017#endif
2018
2019#ifndef MAX_WCHAR_TYPE_SIZE
2020#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
2021#endif
2022
2023/* Interpret a (possibly wide) character constant in TOKEN.
2024 WARN_MULTI warns about multi-character charconsts, if not
2025 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
2026 that did not exist in traditional C. PCHARS_SEEN points to a
2027 variable that is filled in with the number of characters seen. */
2028HOST_WIDE_INT
2029cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
2030 cpp_reader *pfile;
2031 const cpp_token *token;
2032 int warn_multi;
2033 int traditional;
2034 unsigned int *pchars_seen;
2035{
2036 const unsigned char *str = token->val.str.text;
2037 const unsigned char *limit = str + token->val.str.len;
2038 unsigned int chars_seen = 0;
2039 unsigned int width, max_chars, c;
2a967f3d
NB
2040 unsigned HOST_WIDE_INT mask;
2041 HOST_WIDE_INT result = 0;
c8a96070
NB
2042
2043#ifdef MULTIBYTE_CHARS
2044 (void) local_mbtowc (NULL, NULL, 0);
2045#endif
2046
2047 /* Width in bits. */
2048 if (token->type == CPP_CHAR)
2049 width = MAX_CHAR_TYPE_SIZE;
2050 else
2051 width = MAX_WCHAR_TYPE_SIZE;
2052
2053 if (width < HOST_BITS_PER_WIDE_INT)
2054 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
2055 else
2056 mask = ~0;
2057 max_chars = HOST_BITS_PER_WIDE_INT / width;
2058
2059 while (str < limit)
2060 {
2061#ifdef MULTIBYTE_CHARS
2062 wchar_t wc;
2063 int char_len;
2064
2065 char_len = local_mbtowc (&wc, str, limit - str);
2066 if (char_len == -1)
2067 {
2068 cpp_warning (pfile, "ignoring invalid multibyte character");
2069 c = *str++;
2070 }
2071 else
2072 {
2073 str += char_len;
2074 c = wc;
2075 }
2076#else
2077 c = *str++;
2078#endif
2079
2080 if (c == '\\')
62729350 2081 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
c8a96070
NB
2082
2083#ifdef MAP_CHARACTER
2084 if (ISPRINT (c))
2085 c = MAP_CHARACTER (c);
2086#endif
2087
2088 /* Merge character into result; ignore excess chars. */
2089 if (++chars_seen <= max_chars)
2090 {
2091 if (width < HOST_BITS_PER_WIDE_INT)
2092 result = (result << width) | (c & mask);
2093 else
2094 result = c;
2095 }
2096 }
2097
2098 if (chars_seen == 0)
2099 cpp_error (pfile, "empty character constant");
2100 else if (chars_seen > max_chars)
2101 {
2102 chars_seen = max_chars;
f8710242 2103 cpp_warning (pfile, "character constant too long");
c8a96070
NB
2104 }
2105 else if (chars_seen > 1 && !traditional && warn_multi)
2106 cpp_warning (pfile, "multi-character character constant");
2107
2108 /* If char type is signed, sign-extend the constant. The
2109 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
2110 if (token->type == CPP_CHAR && chars_seen)
2111 {
2112 unsigned int nbits = chars_seen * width;
2113 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2114
2115 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2116 || ((result >> (nbits - 1)) & 1) == 0)
2117 result &= mask;
2118 else
2119 result |= ~mask;
2120 }
2121
2122 *pchars_seen = chars_seen;
2123 return result;
2124}
2125
93c80368 2126/* Memory pools. */
417f3e3a 2127
93c80368 2128struct dummy
417f3e3a 2129{
93c80368
NB
2130 char c;
2131 union
2132 {
2133 double d;
2134 int *p;
2135 } u;
2136};
417f3e3a 2137
93c80368 2138#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
417f3e3a 2139
93c80368
NB
2140static int
2141chunk_suitable (pool, chunk, size)
2142 cpp_pool *pool;
2143 cpp_chunk *chunk;
2144 unsigned int size;
2145{
2146 /* Being at least twice SIZE means we can use memcpy in
2147 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2148 anyway. */
2149 return (chunk && pool->locked != chunk
2150 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
041c3194 2151}
c5a04734 2152
93c80368
NB
2153/* Returns the end of the new pool. PTR points to a char in the old
2154 pool, and is updated to point to the same char in the new pool. */
2155unsigned char *
2156_cpp_next_chunk (pool, len, ptr)
2157 cpp_pool *pool;
2158 unsigned int len;
2159 unsigned char **ptr;
041c3194 2160{
93c80368 2161 cpp_chunk *chunk = pool->cur->next;
c5a04734 2162
93c80368
NB
2163 /* LEN is the minimum size we want in the new pool. */
2164 len += POOL_ROOM (pool);
2165 if (! chunk_suitable (pool, chunk, len))
041c3194 2166 {
93c80368 2167 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
c5a04734 2168
93c80368
NB
2169 chunk->next = pool->cur->next;
2170 pool->cur->next = chunk;
c5a04734
ZW
2171 }
2172
93c80368
NB
2173 /* Update the pointer before changing chunk's front. */
2174 if (ptr)
2175 *ptr += chunk->base - POOL_FRONT (pool);
041c3194 2176
93c80368
NB
2177 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2178 chunk->front = chunk->base;
041c3194 2179
93c80368
NB
2180 pool->cur = chunk;
2181 return POOL_LIMIT (pool);
c5a04734
ZW
2182}
2183
93c80368
NB
2184static cpp_chunk *
2185new_chunk (size)
2186 unsigned int size;
041c3194 2187{
93c80368
NB
2188 unsigned char *base;
2189 cpp_chunk *result;
3fef5b2b 2190
269592a8 2191 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
93c80368
NB
2192 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2193 /* Put the chunk descriptor at the end. Then chunk overruns will
2194 cause obvious chaos. */
2195 result = (cpp_chunk *) (base + size);
2196 result->base = base;
2197 result->front = base;
2198 result->limit = base + size;
2199 result->next = 0;
417f3e3a 2200
93c80368 2201 return result;
041c3194
ZW
2202}
2203
93c80368
NB
2204void
2205_cpp_init_pool (pool, size, align, temp)
2206 cpp_pool *pool;
2207 unsigned int size, align, temp;
2208{
2209 if (align == 0)
2210 align = DEFAULT_ALIGNMENT;
2211 if (align & (align - 1))
2212 abort ();
2213 pool->align = align;
bef985f3
NB
2214 pool->first = new_chunk (size);
2215 pool->cur = pool->first;
93c80368
NB
2216 pool->locked = 0;
2217 pool->locks = 0;
2218 if (temp)
2219 pool->cur->next = pool->cur;
041c3194
ZW
2220}
2221
93c80368
NB
2222void
2223_cpp_lock_pool (pool)
2224 cpp_pool *pool;
041c3194 2225{
93c80368
NB
2226 if (pool->locks++ == 0)
2227 pool->locked = pool->cur;
041c3194
ZW
2228}
2229
93c80368
NB
2230void
2231_cpp_unlock_pool (pool)
2232 cpp_pool *pool;
041c3194 2233{
93c80368
NB
2234 if (--pool->locks == 0)
2235 pool->locked = 0;
041c3194
ZW
2236}
2237
93c80368
NB
2238void
2239_cpp_free_pool (pool)
2240 cpp_pool *pool;
3fef5b2b 2241{
bef985f3 2242 cpp_chunk *chunk = pool->first, *next;
3fef5b2b 2243
93c80368 2244 do
3fef5b2b 2245 {
93c80368
NB
2246 next = chunk->next;
2247 free (chunk->base);
2248 chunk = next;
3fef5b2b 2249 }
bef985f3 2250 while (chunk && chunk != pool->first);
041c3194 2251}
041c3194 2252
93c80368
NB
2253/* Reserve LEN bytes from a memory pool. */
2254unsigned char *
2255_cpp_pool_reserve (pool, len)
2256 cpp_pool *pool;
2257 unsigned int len;
041c3194 2258{
269592a8 2259 len = POOL_ALIGN (len, pool->align);
93c80368
NB
2260 if (len > (unsigned int) POOL_ROOM (pool))
2261 _cpp_next_chunk (pool, len, 0);
041c3194 2262
93c80368 2263 return POOL_FRONT (pool);
c5a04734
ZW
2264}
2265
93c80368
NB
2266/* Allocate LEN bytes from a memory pool. */
2267unsigned char *
2268_cpp_pool_alloc (pool, len)
2269 cpp_pool *pool;
2270 unsigned int len;
041c3194 2271{
93c80368 2272 unsigned char *result = _cpp_pool_reserve (pool, len);
417f3e3a 2273
93c80368
NB
2274 POOL_COMMIT (pool, len);
2275 return result;
041c3194 2276}
This page took 0.725644 seconds and 5 git commands to generate.