]> gcc.gnu.org Git - gcc.git/blame - gcc/cpplex.c
spew.c (read_token): Call yyerror on all unexpected tokens.
[gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
93c80368
NB
23/* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
27
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
041c3194 36
45b966db
ZW
37#include "config.h"
38#include "system.h"
45b966db
ZW
39#include "cpplib.h"
40#include "cpphash.h"
41
c8a96070
NB
42/* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
45#ifdef CROSS_COMPILE
46#undef MULTIBYTE_CHARS
47#endif
48
49#ifdef MULTIBYTE_CHARS
50#include "mbchar.h"
51#include <locale.h>
52#endif
53
93c80368
NB
54/* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
56enum spell_type
f9a0e96c 57{
93c80368
NB
58 SPELL_OPERATOR = 0,
59 SPELL_CHAR,
60 SPELL_IDENT,
61 SPELL_STRING,
62 SPELL_NONE
f9a0e96c
ZW
63};
64
93c80368 65struct token_spelling
f9a0e96c 66{
93c80368
NB
67 enum spell_type category;
68 const unsigned char *name;
f9a0e96c
ZW
69};
70
93c80368
NB
71const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
72 U":>", U"<%", U"%>"};
73
74#define OP(e, s) { SPELL_OPERATOR, U s },
75#define TK(e, s) { s, U STRINGX (e) },
76const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
77#undef OP
78#undef TK
79
80#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
f2d5f0cc 82
0d9f234d
NB
83static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
84static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
85static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
86
041c3194 87static int skip_block_comment PARAMS ((cpp_reader *));
cbcff6df 88static int skip_line_comment PARAMS ((cpp_reader *));
0d9f234d
NB
89static void adjust_column PARAMS ((cpp_reader *));
90static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
91static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
93c80368
NB
92static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
93static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
0d9f234d 94static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
93c80368 95static void unterminated PARAMS ((cpp_reader *, int));
0d9f234d
NB
96static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
97static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
cbcff6df
NB
98static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
99static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
93c80368 100static int name_p PARAMS ((cpp_reader *, const cpp_string *));
c8a96070
NB
101static unsigned int parse_escape PARAMS ((cpp_reader *, const unsigned char **,
102 const unsigned char *, HOST_WIDE_INT,
103 int));
104static unsigned int read_ucs PARAMS ((cpp_reader *, const unsigned char **,
105 const unsigned char *, unsigned int));
f617b8e2 106
93c80368
NB
107static cpp_chunk *new_chunk PARAMS ((unsigned int));
108static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
c8a96070 109static unsigned int hex_digit_value PARAMS ((unsigned int));
15dad1d9 110
041c3194 111/* Utility routine:
9e62c811 112
bfb9dc7f
ZW
113 Compares, the token TOKEN to the NUL-terminated string STRING.
114 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
15dad1d9 115
041c3194 116int
bfb9dc7f
ZW
117cpp_ideq (token, string)
118 const cpp_token *token;
041c3194
ZW
119 const char *string;
120{
bfb9dc7f 121 if (token->type != CPP_NAME)
041c3194 122 return 0;
bfb9dc7f 123
93c80368 124 return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
15dad1d9 125}
1368ee70 126
0d9f234d
NB
127/* Call when meeting a newline. Returns the character after the newline
128 (or carriage-return newline combination), or EOF. */
129static cppchar_t
130handle_newline (buffer, newline_char)
131 cpp_buffer *buffer;
132 cppchar_t newline_char;
133{
134 cppchar_t next = EOF;
135
136 buffer->col_adjust = 0;
137 buffer->lineno++;
138 buffer->line_base = buffer->cur;
139
140 /* Handle CR-LF and LF-CR combinations, get the next character. */
141 if (buffer->cur < buffer->rlimit)
142 {
143 next = *buffer->cur++;
144 if (next + newline_char == '\r' + '\n')
145 {
146 buffer->line_base = buffer->cur;
147 if (buffer->cur < buffer->rlimit)
148 next = *buffer->cur++;
149 else
150 next = EOF;
151 }
152 }
153
154 buffer->read_ahead = next;
155 return next;
156}
157
158/* Subroutine of skip_escaped_newlines; called when a trigraph is
159 encountered. It warns if necessary, and returns true if the
160 trigraph should be honoured. FROM_CHAR is the third character of a
161 trigraph, and presumed to be the previous character for position
162 reporting. */
45b966db 163static int
0d9f234d 164trigraph_ok (pfile, from_char)
45b966db 165 cpp_reader *pfile;
0d9f234d 166 cppchar_t from_char;
45b966db 167{
041c3194
ZW
168 int accept = CPP_OPTION (pfile, trigraphs);
169
cbcff6df
NB
170 /* Don't warn about trigraphs in comments. */
171 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
45b966db 172 {
0d9f234d 173 cpp_buffer *buffer = pfile->buffer;
041c3194 174 if (accept)
0d9f234d 175 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
041c3194 176 "trigraph ??%c converted to %c",
0d9f234d
NB
177 (int) from_char,
178 (int) _cpp_trigraph_map[from_char]);
4a5b68a2
NB
179 else if (buffer->cur != buffer->last_Wtrigraphs)
180 {
181 buffer->last_Wtrigraphs = buffer->cur;
182 cpp_warning_with_line (pfile, buffer->lineno,
183 CPP_BUF_COL (buffer) - 2,
184 "trigraph ??%c ignored", (int) from_char);
185 }
45b966db 186 }
0d9f234d 187
041c3194 188 return accept;
45b966db
ZW
189}
190
0d9f234d
NB
191/* Assumes local variables buffer and result. */
192#define ACCEPT_CHAR(t) \
193 do { result->type = t; buffer->read_ahead = EOF; } while (0)
194
195/* When we move to multibyte character sets, add to these something
196 that saves and restores the state of the multibyte conversion
197 library. This probably involves saving and restoring a "cookie".
198 In the case of glibc it is an 8-byte structure, so is not a high
199 overhead operation. In any case, it's out of the fast path. */
200#define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
201#define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
202
203/* Skips any escaped newlines introduced by NEXT, which is either a
204 '?' or a '\\'. Returns the next character, which will also have
a5c3cccd
NB
205 been placed in buffer->read_ahead. This routine performs
206 preprocessing stages 1 and 2 of the ISO C standard. */
0d9f234d
NB
207static cppchar_t
208skip_escaped_newlines (buffer, next)
209 cpp_buffer *buffer;
210 cppchar_t next;
45b966db 211{
a5c3cccd
NB
212 /* Only do this if we apply stages 1 and 2. */
213 if (!buffer->from_stage3)
041c3194 214 {
a5c3cccd
NB
215 cppchar_t next1;
216 const unsigned char *saved_cur;
217 int space;
218
219 do
0d9f234d 220 {
a5c3cccd
NB
221 if (buffer->cur == buffer->rlimit)
222 break;
223
224 SAVE_STATE ();
225 if (next == '?')
0d9f234d 226 {
a5c3cccd
NB
227 next1 = *buffer->cur++;
228 if (next1 != '?' || buffer->cur == buffer->rlimit)
229 {
230 RESTORE_STATE ();
231 break;
232 }
233
234 next1 = *buffer->cur++;
235 if (!_cpp_trigraph_map[next1]
236 || !trigraph_ok (buffer->pfile, next1))
237 {
238 RESTORE_STATE ();
239 break;
240 }
241
242 /* We have a full trigraph here. */
243 next = _cpp_trigraph_map[next1];
244 if (next != '\\' || buffer->cur == buffer->rlimit)
245 break;
246 SAVE_STATE ();
247 }
248
249 /* We have a backslash, and room for at least one more character. */
250 space = 0;
251 do
252 {
253 next1 = *buffer->cur++;
254 if (!is_nvspace (next1))
255 break;
256 space = 1;
0d9f234d 257 }
a5c3cccd 258 while (buffer->cur < buffer->rlimit);
041c3194 259
a5c3cccd 260 if (!is_vspace (next1))
0d9f234d
NB
261 {
262 RESTORE_STATE ();
263 break;
264 }
45b966db 265
c673abe4 266 if (space && !buffer->pfile->state.lexing_comment)
a5c3cccd
NB
267 cpp_warning (buffer->pfile,
268 "backslash and newline separated by space");
0d9f234d 269
a5c3cccd
NB
270 next = handle_newline (buffer, next1);
271 if (next == EOF)
272 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
0d9f234d 273 }
a5c3cccd 274 while (next == '\\' || next == '?');
041c3194 275 }
45b966db 276
0d9f234d
NB
277 buffer->read_ahead = next;
278 return next;
45b966db
ZW
279}
280
0d9f234d
NB
281/* Obtain the next character, after trigraph conversion and skipping
282 an arbitrary string of escaped newlines. The common case of no
283 trigraphs or escaped newlines falls through quickly. */
284static cppchar_t
285get_effective_char (buffer)
286 cpp_buffer *buffer;
64aaf407 287{
0d9f234d
NB
288 cppchar_t next = EOF;
289
290 if (buffer->cur < buffer->rlimit)
291 {
292 next = *buffer->cur++;
293
294 /* '?' can introduce trigraphs (and therefore backslash); '\\'
295 can introduce escaped newlines, which we want to skip, or
296 UCNs, which, depending upon lexer state, we will handle in
297 the future. */
298 if (next == '?' || next == '\\')
299 next = skip_escaped_newlines (buffer, next);
300 }
301
302 buffer->read_ahead = next;
303 return next;
64aaf407
NB
304}
305
0d9f234d
NB
306/* Skip a C-style block comment. We find the end of the comment by
307 seeing if an asterisk is before every '/' we encounter. Returns
308 non-zero if comment terminated by EOF, zero otherwise. */
041c3194
ZW
309static int
310skip_block_comment (pfile)
45b966db
ZW
311 cpp_reader *pfile;
312{
041c3194 313 cpp_buffer *buffer = pfile->buffer;
d8090680 314 cppchar_t c = EOF, prevc = EOF;
0d9f234d 315
cbcff6df 316 pfile->state.lexing_comment = 1;
0d9f234d 317 while (buffer->cur != buffer->rlimit)
45b966db 318 {
0d9f234d
NB
319 prevc = c, c = *buffer->cur++;
320
321 next_char:
322 /* FIXME: For speed, create a new character class of characters
93c80368 323 of interest inside block comments. */
0d9f234d
NB
324 if (c == '?' || c == '\\')
325 c = skip_escaped_newlines (buffer, c);
041c3194 326
0d9f234d
NB
327 /* People like decorating comments with '*', so check for '/'
328 instead for efficiency. */
041c3194 329 if (c == '/')
45b966db 330 {
0d9f234d
NB
331 if (prevc == '*')
332 break;
041c3194 333
0d9f234d
NB
334 /* Warn about potential nested comments, but not if the '/'
335 comes immediately before the true comment delimeter.
041c3194 336 Don't bother to get it right across escaped newlines. */
0d9f234d
NB
337 if (CPP_OPTION (pfile, warn_comments)
338 && buffer->cur != buffer->rlimit)
45b966db 339 {
0d9f234d
NB
340 prevc = c, c = *buffer->cur++;
341 if (c == '*' && buffer->cur != buffer->rlimit)
342 {
343 prevc = c, c = *buffer->cur++;
344 if (c != '/')
345 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
346 CPP_BUF_COL (buffer),
347 "\"/*\" within comment");
348 }
349 goto next_char;
45b966db 350 }
45b966db 351 }
91fcd158 352 else if (is_vspace (c))
45b966db 353 {
0d9f234d
NB
354 prevc = c, c = handle_newline (buffer, c);
355 goto next_char;
45b966db 356 }
52fadca8 357 else if (c == '\t')
0d9f234d 358 adjust_column (pfile);
45b966db 359 }
041c3194 360
cbcff6df 361 pfile->state.lexing_comment = 0;
0d9f234d
NB
362 buffer->read_ahead = EOF;
363 return c != '/' || prevc != '*';
45b966db
ZW
364}
365
f9a0e96c 366/* Skip a C++ line comment. Handles escaped newlines. Returns
0d9f234d
NB
367 non-zero if a multiline comment. The following new line, if any,
368 is left in buffer->read_ahead. */
041c3194 369static int
cbcff6df
NB
370skip_line_comment (pfile)
371 cpp_reader *pfile;
45b966db 372{
cbcff6df 373 cpp_buffer *buffer = pfile->buffer;
0d9f234d
NB
374 unsigned int orig_lineno = buffer->lineno;
375 cppchar_t c;
041c3194 376
cbcff6df 377 pfile->state.lexing_comment = 1;
0d9f234d 378 do
041c3194 379 {
0d9f234d
NB
380 c = EOF;
381 if (buffer->cur == buffer->rlimit)
382 break;
041c3194 383
0d9f234d
NB
384 c = *buffer->cur++;
385 if (c == '?' || c == '\\')
386 c = skip_escaped_newlines (buffer, c);
041c3194 387 }
0d9f234d 388 while (!is_vspace (c));
45b966db 389
cbcff6df 390 pfile->state.lexing_comment = 0;
0d9f234d
NB
391 buffer->read_ahead = c; /* Leave any newline for caller. */
392 return orig_lineno != buffer->lineno;
041c3194 393}
45b966db 394
0d9f234d
NB
395/* pfile->buffer->cur is one beyond the \t character. Update
396 col_adjust so we track the column correctly. */
52fadca8 397static void
0d9f234d 398adjust_column (pfile)
52fadca8 399 cpp_reader *pfile;
52fadca8 400{
0d9f234d
NB
401 cpp_buffer *buffer = pfile->buffer;
402 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
52fadca8
NB
403
404 /* Round it up to multiple of the tabstop, but subtract 1 since the
405 tab itself occupies a character position. */
0d9f234d
NB
406 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
407 - col % CPP_OPTION (pfile, tabstop)) - 1;
52fadca8
NB
408}
409
0d9f234d
NB
410/* Skips whitespace, saving the next non-whitespace character.
411 Adjusts pfile->col_adjust to account for tabs. Without this,
412 tokens might be assigned an incorrect column. */
041c3194 413static void
0d9f234d 414skip_whitespace (pfile, c)
041c3194 415 cpp_reader *pfile;
0d9f234d 416 cppchar_t c;
041c3194
ZW
417{
418 cpp_buffer *buffer = pfile->buffer;
0d9f234d 419 unsigned int warned = 0;
45b966db 420
0d9f234d 421 do
041c3194 422 {
91fcd158
NB
423 /* Horizontal space always OK. */
424 if (c == ' ')
0d9f234d 425 ;
91fcd158 426 else if (c == '\t')
0d9f234d
NB
427 adjust_column (pfile);
428 /* Just \f \v or \0 left. */
91fcd158 429 else if (c == '\0')
041c3194 430 {
91fcd158 431 if (!warned)
0d9f234d
NB
432 {
433 cpp_warning (pfile, "null character(s) ignored");
434 warned = 1;
435 }
45b966db 436 }
93c80368 437 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
91fcd158
NB
438 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
439 CPP_BUF_COL (buffer),
440 "%s in preprocessing directive",
441 c == '\f' ? "form feed" : "vertical tab");
0d9f234d
NB
442
443 c = EOF;
444 if (buffer->cur == buffer->rlimit)
445 break;
446 c = *buffer->cur++;
45b966db 447 }
0d9f234d
NB
448 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
449 while (is_nvspace (c));
450
451 /* Remember the next character. */
452 buffer->read_ahead = c;
041c3194 453}
45b966db 454
93c80368
NB
455/* See if the characters of a number token are valid in a name (no
456 '.', '+' or '-'). */
457static int
458name_p (pfile, string)
459 cpp_reader *pfile;
460 const cpp_string *string;
461{
462 unsigned int i;
463
464 for (i = 0; i < string->len; i++)
465 if (!is_idchar (string->text[i]))
466 return 0;
467
468 return 1;
469}
470
0d9f234d
NB
471/* Parse an identifier, skipping embedded backslash-newlines.
472 Calculate the hash value of the token while parsing, for improved
473 performance. The hashing algorithm *must* match cpp_lookup(). */
474
475static cpp_hashnode *
476parse_identifier (pfile, c)
45b966db 477 cpp_reader *pfile;
0d9f234d 478 cppchar_t c;
45b966db 479{
93c80368 480 cpp_hashnode *result;
0d9f234d 481 cpp_buffer *buffer = pfile->buffer;
93c80368 482 unsigned char *dest, *limit;
0d9f234d 483 unsigned int r = 0, saw_dollar = 0;
93c80368
NB
484
485 dest = POOL_FRONT (&pfile->ident_pool);
486 limit = POOL_LIMIT (&pfile->ident_pool);
041c3194 487
0d9f234d 488 do
041c3194 489 {
0d9f234d 490 do
041c3194 491 {
93c80368
NB
492 /* Need room for terminating null. */
493 if (dest + 1 >= limit)
494 limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
495
496 *dest++ = c;
0d9f234d 497 r = HASHSTEP (r, c);
45b966db 498
0d9f234d
NB
499 if (c == '$')
500 saw_dollar++;
ba89d661 501
0d9f234d
NB
502 c = EOF;
503 if (buffer->cur == buffer->rlimit)
504 break;
ba89d661 505
0d9f234d
NB
506 c = *buffer->cur++;
507 }
508 while (is_idchar (c));
ba89d661 509
0d9f234d
NB
510 /* Potential escaped newline? */
511 if (c != '?' && c != '\\')
512 break;
513 c = skip_escaped_newlines (buffer, c);
041c3194 514 }
0d9f234d
NB
515 while (is_idchar (c));
516
93c80368
NB
517 /* Remember the next character. */
518 buffer->read_ahead = c;
519
0d9f234d
NB
520 /* $ is not a identifier character in the standard, but is commonly
521 accepted as an extension. Don't warn about it in skipped
522 conditional blocks. */
523 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
524 cpp_pedwarn (pfile, "'$' character(s) in identifier");
525
93c80368
NB
526 /* Identifiers are null-terminated. */
527 *dest = '\0';
528
529 /* This routine commits the memory if necessary. */
530 result = _cpp_lookup_with_hash (pfile,
531 dest - POOL_FRONT (&pfile->ident_pool), r);
532
533 /* Some identifiers require diagnostics when lexed. */
534 if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
535 {
536 /* It is allowed to poison the same identifier twice. */
537 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
538 cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
539
540 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
28e0f040 541 replacement list of a variadic macro. */
93c80368
NB
542 if (result == pfile->spec_nodes.n__VA_ARGS__
543 && !pfile->state.va_args_ok)
28e0f040 544 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
93c80368
NB
545 }
546
547 return result;
45b966db
ZW
548}
549
0d9f234d 550/* Parse a number, skipping embedded backslash-newlines. */
45b966db 551static void
93c80368 552parse_number (pfile, number, c, leading_period)
45b966db 553 cpp_reader *pfile;
0d9f234d
NB
554 cpp_string *number;
555 cppchar_t c;
93c80368 556 int leading_period;
45b966db 557{
041c3194 558 cpp_buffer *buffer = pfile->buffer;
49fe13f6 559 cpp_pool *pool = &pfile->ident_pool;
93c80368 560 unsigned char *dest, *limit;
45b966db 561
93c80368
NB
562 dest = POOL_FRONT (pool);
563 limit = POOL_LIMIT (pool);
cbcff6df 564
93c80368
NB
565 /* Place a leading period. */
566 if (leading_period)
567 {
568 if (dest >= limit)
569 limit = _cpp_next_chunk (pool, 0, &dest);
570 *dest++ = '.';
571 }
572
0d9f234d 573 do
041c3194 574 {
0d9f234d
NB
575 do
576 {
93c80368
NB
577 /* Need room for terminating null. */
578 if (dest + 1 >= limit)
579 limit = _cpp_next_chunk (pool, 0, &dest);
580 *dest++ = c;
0d9f234d 581
0d9f234d
NB
582 c = EOF;
583 if (buffer->cur == buffer->rlimit)
584 break;
45b966db 585
0d9f234d
NB
586 c = *buffer->cur++;
587 }
93c80368 588 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
45b966db 589
0d9f234d
NB
590 /* Potential escaped newline? */
591 if (c != '?' && c != '\\')
592 break;
593 c = skip_escaped_newlines (buffer, c);
45b966db 594 }
93c80368 595 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
cbcff6df 596
0d9f234d
NB
597 /* Remember the next character. */
598 buffer->read_ahead = c;
64aaf407 599
93c80368
NB
600 /* Null-terminate the number. */
601 *dest = '\0';
602
603 number->text = POOL_FRONT (pool);
604 number->len = dest - number->text;
605 POOL_COMMIT (pool, number->len + 1);
0d9f234d
NB
606}
607
608/* Subroutine of parse_string. Emits error for unterminated strings. */
609static void
93c80368 610unterminated (pfile, term)
0d9f234d 611 cpp_reader *pfile;
0d9f234d
NB
612 int term;
613{
614 cpp_error (pfile, "missing terminating %c character", term);
615
93c80368
NB
616 if (term == '\"' && pfile->mlstring_pos.line
617 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
041c3194 618 {
93c80368
NB
619 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
620 pfile->mlstring_pos.col,
0d9f234d 621 "possible start of unterminated string literal");
93c80368 622 pfile->mlstring_pos.line = 0;
041c3194 623 }
45b966db
ZW
624}
625
93c80368
NB
626/* Subroutine of parse_string. */
627static int
628unescaped_terminator_p (pfile, dest)
629 cpp_reader *pfile;
630 const unsigned char *dest;
631{
632 const unsigned char *start, *temp;
633
634 /* In #include-style directives, terminators are not escapeable. */
635 if (pfile->state.angled_headers)
636 return 1;
637
49fe13f6 638 start = POOL_FRONT (&pfile->ident_pool);
93c80368
NB
639
640 /* An odd number of consecutive backslashes represents an escaped
641 terminator. */
642 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
643 ;
644
645 return ((dest - temp) & 1) == 0;
646}
647
0d9f234d 648/* Parses a string, character constant, or angle-bracketed header file
7868b4a2
NB
649 name. Handles embedded trigraphs and escaped newlines. The stored
650 string is guaranteed NUL-terminated, but it is not guaranteed that
651 this is the first NUL since embedded NULs are preserved.
45b966db 652
7868b4a2 653 Multi-line strings are allowed, but they are deprecated. */
041c3194 654static void
0d9f234d 655parse_string (pfile, token, terminator)
45b966db 656 cpp_reader *pfile;
041c3194 657 cpp_token *token;
0d9f234d 658 cppchar_t terminator;
45b966db 659{
041c3194 660 cpp_buffer *buffer = pfile->buffer;
49fe13f6 661 cpp_pool *pool = &pfile->ident_pool;
93c80368 662 unsigned char *dest, *limit;
0d9f234d
NB
663 cppchar_t c;
664 unsigned int nulls = 0;
665
93c80368
NB
666 dest = POOL_FRONT (pool);
667 limit = POOL_LIMIT (pool);
668
0d9f234d 669 for (;;)
45b966db 670 {
0d9f234d 671 if (buffer->cur == buffer->rlimit)
7868b4a2
NB
672 c = EOF;
673 else
674 c = *buffer->cur++;
675
676 have_char:
677 /* We need space for the terminating NUL. */
678 if (dest >= limit)
679 limit = _cpp_next_chunk (pool, 0, &dest);
680
681 if (c == EOF)
0d9f234d 682 {
93c80368 683 unterminated (pfile, terminator);
0d9f234d
NB
684 break;
685 }
0d9f234d 686
0d9f234d
NB
687 /* Handle trigraphs, escaped newlines etc. */
688 if (c == '?' || c == '\\')
689 c = skip_escaped_newlines (buffer, c);
45b966db 690
93c80368 691 if (c == terminator && unescaped_terminator_p (pfile, dest))
45b966db 692 {
93c80368
NB
693 c = EOF;
694 break;
0d9f234d
NB
695 }
696 else if (is_vspace (c))
697 {
698 /* In assembly language, silently terminate string and
699 character literals at end of line. This is a kludge
700 around not knowing where comments are. */
bdb05a7b 701 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
0d9f234d 702 break;
45b966db 703
0d9f234d
NB
704 /* Character constants and header names may not extend over
705 multiple lines. In Standard C, neither may strings.
706 Unfortunately, we accept multiline strings as an
16eb2788
NB
707 extension, except in #include family directives. */
708 if (terminator != '"' || pfile->state.angled_headers)
45b966db 709 {
93c80368 710 unterminated (pfile, terminator);
0d9f234d 711 break;
45b966db 712 }
45b966db 713
795a25f4 714 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
93c80368 715 if (pfile->mlstring_pos.line == 0)
c691145a 716 pfile->mlstring_pos = pfile->lexer_pos;
0d9f234d 717
7868b4a2
NB
718 c = handle_newline (buffer, c);
719 *dest++ = '\n';
720 goto have_char;
0d9f234d
NB
721 }
722 else if (c == '\0')
723 {
724 if (nulls++ == 0)
725 cpp_warning (pfile, "null character(s) preserved in literal");
45b966db 726 }
45b966db 727
93c80368 728 *dest++ = c;
45b966db
ZW
729 }
730
93c80368 731 /* Remember the next character. */
0d9f234d 732 buffer->read_ahead = c;
7868b4a2 733 *dest = '\0';
45b966db 734
93c80368
NB
735 token->val.str.text = POOL_FRONT (pool);
736 token->val.str.len = dest - token->val.str.text;
7868b4a2 737 POOL_COMMIT (pool, token->val.str.len + 1);
0d9f234d 738}
041c3194 739
93c80368 740/* The stored comment includes the comment start and any terminator. */
9e62c811 741static void
0d9f234d
NB
742save_comment (pfile, token, from)
743 cpp_reader *pfile;
041c3194
ZW
744 cpp_token *token;
745 const unsigned char *from;
9e62c811 746{
041c3194 747 unsigned char *buffer;
0d9f234d 748 unsigned int len;
0d9f234d 749
1c6d33ef 750 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
3542203b
NB
751 /* C++ comments probably (not definitely) have moved past a new
752 line, which we don't want to save in the comment. */
753 if (pfile->buffer->read_ahead != EOF)
754 len--;
49fe13f6 755 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
041c3194 756
041c3194 757 token->type = CPP_COMMENT;
bfb9dc7f 758 token->val.str.len = len;
0d9f234d 759 token->val.str.text = buffer;
45b966db 760
1c6d33ef
NB
761 buffer[0] = '/';
762 memcpy (buffer + 1, from, len - 1);
0d9f234d 763}
45b966db 764
cbcff6df
NB
765/* Subroutine of lex_token to handle '%'. A little tricky, since we
766 want to avoid stepping back when lexing %:%X. */
0d9f234d 767static void
cbcff6df 768lex_percent (buffer, result)
0d9f234d
NB
769 cpp_buffer *buffer;
770 cpp_token *result;
0d9f234d 771{
cbcff6df
NB
772 cppchar_t c;
773
774 result->type = CPP_MOD;
775 /* Parsing %:%X could leave an extra character. */
776 if (buffer->extra_char == EOF)
777 c = get_effective_char (buffer);
778 else
779 {
780 c = buffer->read_ahead = buffer->extra_char;
781 buffer->extra_char = EOF;
782 }
783
784 if (c == '=')
785 ACCEPT_CHAR (CPP_MOD_EQ);
786 else if (CPP_OPTION (buffer->pfile, digraphs))
787 {
788 if (c == ':')
789 {
790 result->flags |= DIGRAPH;
791 ACCEPT_CHAR (CPP_HASH);
792 if (get_effective_char (buffer) == '%')
793 {
794 buffer->extra_char = get_effective_char (buffer);
795 if (buffer->extra_char == ':')
796 {
797 buffer->extra_char = EOF;
798 ACCEPT_CHAR (CPP_PASTE);
799 }
800 else
801 /* We'll catch the extra_char when we're called back. */
802 buffer->read_ahead = '%';
803 }
804 }
805 else if (c == '>')
806 {
807 result->flags |= DIGRAPH;
808 ACCEPT_CHAR (CPP_CLOSE_BRACE);
809 }
810 }
811}
812
813/* Subroutine of lex_token to handle '.'. This is tricky, since we
814 want to avoid stepping back when lexing '...' or '.123'. In the
815 latter case we should also set a flag for parse_number. */
816static void
817lex_dot (pfile, result)
818 cpp_reader *pfile;
819 cpp_token *result;
820{
821 cpp_buffer *buffer = pfile->buffer;
822 cppchar_t c;
823
824 /* Parsing ..X could leave an extra character. */
825 if (buffer->extra_char == EOF)
826 c = get_effective_char (buffer);
827 else
828 {
829 c = buffer->read_ahead = buffer->extra_char;
830 buffer->extra_char = EOF;
831 }
0d9f234d 832
cbcff6df
NB
833 /* All known character sets have 0...9 contiguous. */
834 if (c >= '0' && c <= '9')
835 {
836 result->type = CPP_NUMBER;
93c80368 837 parse_number (pfile, &result->val.str, c, 1);
cbcff6df 838 }
041c3194 839 else
ea4a453b 840 {
cbcff6df
NB
841 result->type = CPP_DOT;
842 if (c == '.')
843 {
844 buffer->extra_char = get_effective_char (buffer);
845 if (buffer->extra_char == '.')
846 {
847 buffer->extra_char = EOF;
848 ACCEPT_CHAR (CPP_ELLIPSIS);
849 }
850 else
851 /* We'll catch the extra_char when we're called back. */
852 buffer->read_ahead = '.';
853 }
854 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
855 ACCEPT_CHAR (CPP_DOT_STAR);
ea4a453b 856 }
45b966db
ZW
857}
858
93c80368
NB
859void
860_cpp_lex_token (pfile, result)
45b966db 861 cpp_reader *pfile;
0d9f234d 862 cpp_token *result;
45b966db 863{
0d9f234d 864 cppchar_t c;
adb84b42 865 cpp_buffer *buffer;
0d9f234d 866 const unsigned char *comment_start;
8d9e9a08 867 unsigned char bol;
9ec7291f 868
8d9e9a08
NB
869 skip:
870 bol = pfile->state.next_bol;
adb84b42
NB
871 done_directive:
872 buffer = pfile->buffer;
4c2b647d 873 pfile->state.next_bol = 0;
bd969772
NB
874 result->flags = buffer->saved_flags;
875 buffer->saved_flags = 0;
0d9f234d 876 next_char:
93c80368 877 pfile->lexer_pos.line = buffer->lineno;
0d9f234d 878 next_char2:
93c80368 879 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
041c3194 880
0d9f234d
NB
881 c = buffer->read_ahead;
882 if (c == EOF && buffer->cur < buffer->rlimit)
883 {
884 c = *buffer->cur++;
93c80368 885 pfile->lexer_pos.col++;
0d9f234d 886 }
45b966db 887
0d9f234d
NB
888 do_switch:
889 buffer->read_ahead = EOF;
890 switch (c)
45b966db 891 {
0d9f234d 892 case EOF:
800914c3
NB
893 /* Non-empty files should end in a newline. Checking "bol" too
894 prevents multiple warnings when hitting the EOF more than
895 once, like in a directive. Don't warn for command line and
896 _Pragma buffers. */
897 if (pfile->lexer_pos.col != 0 && !bol && !buffer->from_stage3)
93c80368 898 cpp_pedwarn (pfile, "no newline at end of file");
4c2b647d 899 pfile->state.next_bol = 1;
b528a07e 900 pfile->skipping = 0; /* In case missing #endif. */
0d9f234d 901 result->type = CPP_EOF;
29b10746
NB
902 /* Don't do MI optimisation. */
903 return;
45b966db 904
0d9f234d
NB
905 case ' ': case '\t': case '\f': case '\v': case '\0':
906 skip_whitespace (pfile, c);
907 result->flags |= PREV_WHITE;
908 goto next_char2;
909
910 case '\n': case '\r':
a949941c 911 if (!pfile->state.in_directive)
45b966db 912 {
93c80368 913 handle_newline (buffer, c);
a949941c 914 bol = 1;
93c80368 915 pfile->lexer_pos.output_line = buffer->lineno;
8d9e9a08
NB
916 /* This is a new line, so clear any white space flag.
917 Newlines in arguments are white space (6.10.3.10);
918 parse_arg takes care of that. */
bd969772 919 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
a949941c 920 goto next_char;
45b966db 921 }
93c80368 922
a949941c
NB
923 /* Don't let directives spill over to the next line. */
924 buffer->read_ahead = c;
4c2b647d 925 pfile->state.next_bol = 1;
93c80368 926 result->type = CPP_EOF;
28e0f040
NB
927 /* Don't break; pfile->skipping might be true. */
928 return;
46d07497 929
0d9f234d
NB
930 case '?':
931 case '\\':
932 /* These could start an escaped newline, or '?' a trigraph. Let
933 skip_escaped_newlines do all the work. */
934 {
935 unsigned int lineno = buffer->lineno;
936
937 c = skip_escaped_newlines (buffer, c);
938 if (lineno != buffer->lineno)
939 /* We had at least one escaped newline of some sort, and the
940 next character is in buffer->read_ahead. Update the
941 token's line and column. */
942 goto next_char;
943
944 /* We are either the original '?' or '\\', or a trigraph. */
945 result->type = CPP_QUERY;
946 buffer->read_ahead = EOF;
947 if (c == '\\')
12c4f523 948 goto random_char;
0d9f234d
NB
949 else if (c != '?')
950 goto do_switch;
951 }
952 break;
46d07497 953
0d9f234d
NB
954 case '0': case '1': case '2': case '3': case '4':
955 case '5': case '6': case '7': case '8': case '9':
956 result->type = CPP_NUMBER;
93c80368 957 parse_number (pfile, &result->val.str, c, 0);
0d9f234d 958 break;
46d07497 959
0d9f234d
NB
960 case '$':
961 if (!CPP_OPTION (pfile, dollars_in_ident))
962 goto random_char;
963 /* Fall through... */
964
965 case '_':
966 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
967 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
968 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
969 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
970 case 'y': case 'z':
971 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
972 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
973 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
974 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
975 case 'Y': case 'Z':
976 result->type = CPP_NAME;
977 result->val.node = parse_identifier (pfile, c);
978
979 /* 'L' may introduce wide characters or strings. */
93c80368 980 if (result->val.node == pfile->spec_nodes.n_L)
0d9f234d
NB
981 {
982 c = buffer->read_ahead; /* For make_string. */
983 if (c == '\'' || c == '"')
ba89d661 984 {
0d9f234d
NB
985 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
986 goto make_string;
ba89d661 987 }
0d9f234d
NB
988 }
989 /* Convert named operators to their proper types. */
93c80368 990 else if (result->val.node->flags & NODE_OPERATOR)
0d9f234d
NB
991 {
992 result->flags |= NAMED_OP;
93c80368 993 result->type = result->val.node->value.operator;
0d9f234d
NB
994 }
995 break;
996
997 case '\'':
998 case '"':
999 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1000 make_string:
1001 parse_string (pfile, result, c);
1002 break;
041c3194 1003
0d9f234d 1004 case '/':
1c6d33ef
NB
1005 /* A potential block or line comment. */
1006 comment_start = buffer->cur;
0d9f234d
NB
1007 result->type = CPP_DIV;
1008 c = get_effective_char (buffer);
1009 if (c == '=')
1010 ACCEPT_CHAR (CPP_DIV_EQ);
1c6d33ef
NB
1011 if (c != '/' && c != '*')
1012 break;
45b966db 1013
1c6d33ef
NB
1014 if (c == '*')
1015 {
0d9f234d 1016 if (skip_block_comment (pfile))
93c80368
NB
1017 cpp_error_with_line (pfile, pfile->lexer_pos.line,
1018 pfile->lexer_pos.col,
0d9f234d 1019 "unterminated comment");
0d9f234d 1020 }
1c6d33ef 1021 else
0d9f234d 1022 {
1c6d33ef
NB
1023 if (!CPP_OPTION (pfile, cplusplus_comments)
1024 && !CPP_IN_SYSTEM_HEADER (pfile))
1025 break;
1026
bdb05a7b
NB
1027 /* Warn about comments only if pedantically GNUC89, and not
1028 in system headers. */
1029 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 1030 && ! buffer->warned_cplusplus_comments)
041c3194 1031 {
1c6d33ef
NB
1032 cpp_pedwarn (pfile,
1033 "C++ style comments are not allowed in ISO C89");
1034 cpp_pedwarn (pfile,
1035 "(this will be reported only once per input file)");
1036 buffer->warned_cplusplus_comments = 1;
1037 }
0d9f234d 1038
a94c1199 1039 /* Skip_line_comment updates buffer->read_ahead. */
01ef6563 1040 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
93c80368
NB
1041 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1042 pfile->lexer_pos.col,
1c6d33ef
NB
1043 "multi-line comment");
1044 }
0d9f234d 1045
1c6d33ef
NB
1046 /* Skipping the comment has updated buffer->read_ahead. */
1047 if (!pfile->state.save_comments)
1048 {
1049 result->flags |= PREV_WHITE;
1050 goto next_char;
0d9f234d 1051 }
1c6d33ef
NB
1052
1053 /* Save the comment as a token in its own right. */
1054 save_comment (pfile, result, comment_start);
29b10746
NB
1055 /* Don't do MI optimisation. */
1056 return;
0d9f234d
NB
1057
1058 case '<':
1059 if (pfile->state.angled_headers)
1060 {
1061 result->type = CPP_HEADER_NAME;
1062 c = '>'; /* terminator. */
1063 goto make_string;
1064 }
45b966db 1065
0d9f234d
NB
1066 result->type = CPP_LESS;
1067 c = get_effective_char (buffer);
1068 if (c == '=')
1069 ACCEPT_CHAR (CPP_LESS_EQ);
1070 else if (c == '<')
1071 {
1072 ACCEPT_CHAR (CPP_LSHIFT);
1073 if (get_effective_char (buffer) == '=')
1074 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1075 }
1076 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1077 {
1078 ACCEPT_CHAR (CPP_MIN);
1079 if (get_effective_char (buffer) == '=')
1080 ACCEPT_CHAR (CPP_MIN_EQ);
1081 }
1082 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1083 {
1084 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1085 result->flags |= DIGRAPH;
1086 }
1087 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1088 {
1089 ACCEPT_CHAR (CPP_OPEN_BRACE);
1090 result->flags |= DIGRAPH;
1091 }
1092 break;
1093
1094 case '>':
1095 result->type = CPP_GREATER;
1096 c = get_effective_char (buffer);
1097 if (c == '=')
1098 ACCEPT_CHAR (CPP_GREATER_EQ);
1099 else if (c == '>')
1100 {
1101 ACCEPT_CHAR (CPP_RSHIFT);
1102 if (get_effective_char (buffer) == '=')
1103 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1104 }
1105 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1106 {
1107 ACCEPT_CHAR (CPP_MAX);
1108 if (get_effective_char (buffer) == '=')
1109 ACCEPT_CHAR (CPP_MAX_EQ);
1110 }
1111 break;
1112
cbcff6df
NB
1113 case '%':
1114 lex_percent (buffer, result);
93c80368
NB
1115 if (result->type == CPP_HASH)
1116 goto do_hash;
0d9f234d
NB
1117 break;
1118
cbcff6df
NB
1119 case '.':
1120 lex_dot (pfile, result);
0d9f234d 1121 break;
45b966db 1122
0d9f234d
NB
1123 case '+':
1124 result->type = CPP_PLUS;
1125 c = get_effective_char (buffer);
1126 if (c == '=')
1127 ACCEPT_CHAR (CPP_PLUS_EQ);
1128 else if (c == '+')
1129 ACCEPT_CHAR (CPP_PLUS_PLUS);
1130 break;
04e3ec78 1131
0d9f234d
NB
1132 case '-':
1133 result->type = CPP_MINUS;
1134 c = get_effective_char (buffer);
1135 if (c == '>')
1136 {
1137 ACCEPT_CHAR (CPP_DEREF);
1138 if (CPP_OPTION (pfile, cplusplus)
1139 && get_effective_char (buffer) == '*')
1140 ACCEPT_CHAR (CPP_DEREF_STAR);
1141 }
1142 else if (c == '=')
1143 ACCEPT_CHAR (CPP_MINUS_EQ);
1144 else if (c == '-')
1145 ACCEPT_CHAR (CPP_MINUS_MINUS);
1146 break;
45b966db 1147
0d9f234d
NB
1148 case '*':
1149 result->type = CPP_MULT;
1150 if (get_effective_char (buffer) == '=')
1151 ACCEPT_CHAR (CPP_MULT_EQ);
1152 break;
04e3ec78 1153
0d9f234d
NB
1154 case '=':
1155 result->type = CPP_EQ;
1156 if (get_effective_char (buffer) == '=')
1157 ACCEPT_CHAR (CPP_EQ_EQ);
1158 break;
f8f769ea 1159
0d9f234d
NB
1160 case '!':
1161 result->type = CPP_NOT;
1162 if (get_effective_char (buffer) == '=')
1163 ACCEPT_CHAR (CPP_NOT_EQ);
1164 break;
45b966db 1165
0d9f234d
NB
1166 case '&':
1167 result->type = CPP_AND;
1168 c = get_effective_char (buffer);
1169 if (c == '=')
1170 ACCEPT_CHAR (CPP_AND_EQ);
1171 else if (c == '&')
1172 ACCEPT_CHAR (CPP_AND_AND);
1173 break;
1174
1175 case '#':
adb84b42
NB
1176 c = buffer->extra_char; /* Can be set by error condition below. */
1177 if (c != EOF)
1178 {
1179 buffer->read_ahead = c;
1180 buffer->extra_char = EOF;
1181 }
1182 else
1183 c = get_effective_char (buffer);
1184
1185 if (c == '#')
93c80368 1186 {
a949941c
NB
1187 ACCEPT_CHAR (CPP_PASTE);
1188 break;
1189 }
1190
1191 result->type = CPP_HASH;
1192 do_hash:
e8408f25
NB
1193 if (!bol)
1194 break;
1195 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1196 tokens within the list of arguments that would otherwise act
1197 as preprocessing directives, the behavior is undefined.
1198
1199 This implementation will report a hard error, terminate the
1200 macro invocation, and proceed to process the directive. */
1201 if (pfile->state.parsing_args)
a949941c 1202 {
e8408f25
NB
1203 if (pfile->state.parsing_args == 2)
1204 cpp_error (pfile,
1205 "directives may not be used inside a macro argument");
1206
1207 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1208 buffer->extra_char = buffer->read_ahead;
1209 buffer->read_ahead = '#';
1210 pfile->state.next_bol = 1;
1211 result->type = CPP_EOF;
1212
1213 /* Get whitespace right - newline_in_args sets it. */
1214 if (pfile->lexer_pos.col == 1)
1215 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1216 }
1217 else
1218 {
1219 /* This is the hash introducing a directive. */
1220 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1221 goto done_directive; /* bol still 1. */
1222 /* This is in fact an assembler #. */
93c80368 1223 }
0d9f234d 1224 break;
45b966db 1225
0d9f234d
NB
1226 case '|':
1227 result->type = CPP_OR;
1228 c = get_effective_char (buffer);
1229 if (c == '=')
1230 ACCEPT_CHAR (CPP_OR_EQ);
1231 else if (c == '|')
1232 ACCEPT_CHAR (CPP_OR_OR);
1233 break;
45b966db 1234
0d9f234d
NB
1235 case '^':
1236 result->type = CPP_XOR;
1237 if (get_effective_char (buffer) == '=')
1238 ACCEPT_CHAR (CPP_XOR_EQ);
1239 break;
45b966db 1240
0d9f234d
NB
1241 case ':':
1242 result->type = CPP_COLON;
1243 c = get_effective_char (buffer);
1244 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1245 ACCEPT_CHAR (CPP_SCOPE);
1246 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1247 {
1248 result->flags |= DIGRAPH;
1249 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1250 }
1251 break;
45b966db 1252
0d9f234d
NB
1253 case '~': result->type = CPP_COMPL; break;
1254 case ',': result->type = CPP_COMMA; break;
1255 case '(': result->type = CPP_OPEN_PAREN; break;
1256 case ')': result->type = CPP_CLOSE_PAREN; break;
1257 case '[': result->type = CPP_OPEN_SQUARE; break;
1258 case ']': result->type = CPP_CLOSE_SQUARE; break;
1259 case '{': result->type = CPP_OPEN_BRACE; break;
1260 case '}': result->type = CPP_CLOSE_BRACE; break;
1261 case ';': result->type = CPP_SEMICOLON; break;
1262
cc937581
ZW
1263 /* @ is a punctuator in Objective C. */
1264 case '@': result->type = CPP_ATSIGN; break;
0d9f234d
NB
1265
1266 random_char:
1267 default:
1268 result->type = CPP_OTHER;
6c53ebff 1269 result->val.c = c;
0d9f234d
NB
1270 break;
1271 }
b528a07e 1272
8d9e9a08
NB
1273 if (pfile->skipping)
1274 goto skip;
1275
29b10746
NB
1276 /* If not in a directive, this token invalidates controlling macros. */
1277 if (!pfile->state.in_directive)
b528a07e 1278 pfile->mi_state = MI_FAILED;
0d9f234d
NB
1279}
1280
93c80368
NB
1281/* An upper bound on the number of bytes needed to spell a token,
1282 including preceding whitespace. */
1283unsigned int
1284cpp_token_len (token)
1285 const cpp_token *token;
0d9f234d 1286{
93c80368 1287 unsigned int len;
6d2c2047 1288
93c80368 1289 switch (TOKEN_SPELL (token))
041c3194 1290 {
93c80368
NB
1291 default: len = 0; break;
1292 case SPELL_STRING: len = token->val.str.len; break;
1293 case SPELL_IDENT: len = token->val.node->length; break;
041c3194 1294 }
93c80368
NB
1295 /* 1 for whitespace, 4 for comment delimeters. */
1296 return len + 5;
6d2c2047
ZW
1297}
1298
041c3194 1299/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1300 already contain the enough space to hold the token's spelling.
1301 Returns a pointer to the character after the last character
1302 written. */
93c80368
NB
1303unsigned char *
1304cpp_spell_token (pfile, token, buffer)
041c3194
ZW
1305 cpp_reader *pfile; /* Would be nice to be rid of this... */
1306 const cpp_token *token;
1307 unsigned char *buffer;
1308{
96be6998 1309 switch (TOKEN_SPELL (token))
041c3194
ZW
1310 {
1311 case SPELL_OPERATOR:
1312 {
1313 const unsigned char *spelling;
1314 unsigned char c;
d6d5f795 1315
041c3194 1316 if (token->flags & DIGRAPH)
37b8524c
JDA
1317 spelling
1318 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
92936ecf
ZW
1319 else if (token->flags & NAMED_OP)
1320 goto spell_ident;
041c3194 1321 else
96be6998 1322 spelling = TOKEN_NAME (token);
041c3194
ZW
1323
1324 while ((c = *spelling++) != '\0')
1325 *buffer++ = c;
1326 }
1327 break;
d6d5f795 1328
041c3194 1329 case SPELL_IDENT:
92936ecf 1330 spell_ident:
bfb9dc7f
ZW
1331 memcpy (buffer, token->val.node->name, token->val.node->length);
1332 buffer += token->val.node->length;
041c3194 1333 break;
d6d5f795 1334
041c3194
ZW
1335 case SPELL_STRING:
1336 {
ba89d661
ZW
1337 int left, right, tag;
1338 switch (token->type)
1339 {
1340 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1341 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
ba89d661
ZW
1342 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1343 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1344 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1345 default: left = '\0'; right = '\0'; tag = '\0'; break;
1346 }
1347 if (tag) *buffer++ = tag;
1348 if (left) *buffer++ = left;
bfb9dc7f
ZW
1349 memcpy (buffer, token->val.str.text, token->val.str.len);
1350 buffer += token->val.str.len;
ba89d661 1351 if (right) *buffer++ = right;
041c3194
ZW
1352 }
1353 break;
d6d5f795 1354
041c3194 1355 case SPELL_CHAR:
6c53ebff 1356 *buffer++ = token->val.c;
041c3194 1357 break;
d6d5f795 1358
041c3194 1359 case SPELL_NONE:
96be6998 1360 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1361 break;
1362 }
d6d5f795 1363
041c3194
ZW
1364 return buffer;
1365}
d6d5f795 1366
93c80368
NB
1367/* Returns a token as a null-terminated string. The string is
1368 temporary, and automatically freed later. Useful for diagnostics. */
1369unsigned char *
1370cpp_token_as_text (pfile, token)
c5a04734 1371 cpp_reader *pfile;
041c3194 1372 const cpp_token *token;
c5a04734 1373{
93c80368 1374 unsigned int len = cpp_token_len (token);
49fe13f6 1375 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
c5a04734 1376
93c80368
NB
1377 end = cpp_spell_token (pfile, token, start);
1378 end[0] = '\0';
c5a04734 1379
93c80368
NB
1380 return start;
1381}
c5a04734 1382
93c80368
NB
1383/* Used by C front ends. Should really move to using cpp_token_as_text. */
1384const char *
1385cpp_type2name (type)
1386 enum cpp_ttype type;
1387{
1388 return (const char *) token_spellings[type].name;
1389}
c5a04734 1390
93c80368
NB
1391/* Writes the spelling of token to FP. Separate from cpp_spell_token
1392 for efficiency - to avoid double-buffering. Also, outputs a space
1393 if PREV_WHITE is flagged. */
1394void
1395cpp_output_token (token, fp)
1396 const cpp_token *token;
1397 FILE *fp;
1398{
1399 if (token->flags & PREV_WHITE)
1400 putc (' ', fp);
d8090680 1401
93c80368 1402 switch (TOKEN_SPELL (token))
c5a04734 1403 {
93c80368
NB
1404 case SPELL_OPERATOR:
1405 {
1406 const unsigned char *spelling;
c5a04734 1407
93c80368 1408 if (token->flags & DIGRAPH)
37b8524c
JDA
1409 spelling
1410 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
93c80368
NB
1411 else if (token->flags & NAMED_OP)
1412 goto spell_ident;
1413 else
1414 spelling = TOKEN_NAME (token);
041c3194 1415
93c80368
NB
1416 ufputs (spelling, fp);
1417 }
1418 break;
041c3194 1419
93c80368
NB
1420 spell_ident:
1421 case SPELL_IDENT:
1422 ufputs (token->val.node->name, fp);
1423 break;
041c3194 1424
93c80368
NB
1425 case SPELL_STRING:
1426 {
1427 int left, right, tag;
1428 switch (token->type)
1429 {
1430 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1431 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
93c80368
NB
1432 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1433 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1434 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1435 default: left = '\0'; right = '\0'; tag = '\0'; break;
1436 }
1437 if (tag) putc (tag, fp);
1438 if (left) putc (left, fp);
1439 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1440 if (right) putc (right, fp);
1441 }
1442 break;
c5a04734 1443
93c80368 1444 case SPELL_CHAR:
6c53ebff 1445 putc (token->val.c, fp);
93c80368 1446 break;
c5a04734 1447
93c80368
NB
1448 case SPELL_NONE:
1449 /* An error, most probably. */
1450 break;
041c3194 1451 }
c5a04734
ZW
1452}
1453
93c80368
NB
1454/* Compare two tokens. */
1455int
1456_cpp_equiv_tokens (a, b)
1457 const cpp_token *a, *b;
c5a04734 1458{
93c80368
NB
1459 if (a->type == b->type && a->flags == b->flags)
1460 switch (TOKEN_SPELL (a))
1461 {
1462 default: /* Keep compiler happy. */
1463 case SPELL_OPERATOR:
1464 return 1;
1465 case SPELL_CHAR:
6c53ebff 1466 return a->val.c == b->val.c; /* Character. */
93c80368 1467 case SPELL_NONE:
56051c0a 1468 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
93c80368
NB
1469 case SPELL_IDENT:
1470 return a->val.node == b->val.node;
1471 case SPELL_STRING:
1472 return (a->val.str.len == b->val.str.len
1473 && !memcmp (a->val.str.text, b->val.str.text,
1474 a->val.str.len));
1475 }
c5a04734 1476
041c3194
ZW
1477 return 0;
1478}
1479
041c3194
ZW
1480/* Determine whether two tokens can be pasted together, and if so,
1481 what the resulting token is. Returns CPP_EOF if the tokens cannot
1482 be pasted, or the appropriate type for the merged token if they
1483 can. */
7de4d004 1484enum cpp_ttype
93c80368 1485cpp_can_paste (pfile, token1, token2, digraph)
041c3194
ZW
1486 cpp_reader * pfile;
1487 const cpp_token *token1, *token2;
1488 int* digraph;
c5a04734 1489{
041c3194
ZW
1490 enum cpp_ttype a = token1->type, b = token2->type;
1491 int cxx = CPP_OPTION (pfile, cplusplus);
c5a04734 1492
92936ecf
ZW
1493 /* Treat named operators as if they were ordinary NAMEs. */
1494 if (token1->flags & NAMED_OP)
1495 a = CPP_NAME;
1496 if (token2->flags & NAMED_OP)
1497 b = CPP_NAME;
1498
37b8524c
JDA
1499 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1500 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
c5a04734 1501
041c3194 1502 switch (a)
c5a04734 1503 {
041c3194
ZW
1504 case CPP_GREATER:
1505 if (b == a) return CPP_RSHIFT;
1506 if (b == CPP_QUERY && cxx) return CPP_MAX;
1507 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1508 break;
1509 case CPP_LESS:
1510 if (b == a) return CPP_LSHIFT;
1511 if (b == CPP_QUERY && cxx) return CPP_MIN;
1512 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
9b55f29a
NB
1513 if (CPP_OPTION (pfile, digraphs))
1514 {
1515 if (b == CPP_COLON)
1516 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1517 if (b == CPP_MOD)
1518 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1519 }
041c3194 1520 break;
c5a04734 1521
041c3194
ZW
1522 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1523 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1524 case CPP_OR: if (b == a) return CPP_OR_OR; break;
c5a04734 1525
041c3194
ZW
1526 case CPP_MINUS:
1527 if (b == a) return CPP_MINUS_MINUS;
1528 if (b == CPP_GREATER) return CPP_DEREF;
1529 break;
1530 case CPP_COLON:
1531 if (b == a && cxx) return CPP_SCOPE;
9b55f29a 1532 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
041c3194
ZW
1533 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1534 break;
1535
1536 case CPP_MOD:
9b55f29a
NB
1537 if (CPP_OPTION (pfile, digraphs))
1538 {
1539 if (b == CPP_GREATER)
1540 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1541 if (b == CPP_COLON)
1542 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1543 }
041c3194
ZW
1544 break;
1545 case CPP_DEREF:
1546 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1547 break;
1548 case CPP_DOT:
1549 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1550 if (b == CPP_NUMBER) return CPP_NUMBER;
1551 break;
1552
1553 case CPP_HASH:
1554 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1555 /* %:%: digraph */
1556 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1557 break;
1558
1559 case CPP_NAME:
1560 if (b == CPP_NAME) return CPP_NAME;
1561 if (b == CPP_NUMBER
93c80368 1562 && name_p (pfile, &token2->val.str)) return CPP_NAME;
041c3194 1563 if (b == CPP_CHAR
93c80368 1564 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
041c3194 1565 if (b == CPP_STRING
93c80368 1566 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
041c3194
ZW
1567 break;
1568
1569 case CPP_NUMBER:
1570 if (b == CPP_NUMBER) return CPP_NUMBER;
1571 if (b == CPP_NAME) return CPP_NUMBER;
1572 if (b == CPP_DOT) return CPP_NUMBER;
1573 /* Numbers cannot have length zero, so this is safe. */
1574 if ((b == CPP_PLUS || b == CPP_MINUS)
bfb9dc7f 1575 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
041c3194
ZW
1576 return CPP_NUMBER;
1577 break;
1578
1579 default:
1580 break;
c5a04734
ZW
1581 }
1582
041c3194
ZW
1583 return CPP_EOF;
1584}
1585
93c80368
NB
1586/* Returns nonzero if a space should be inserted to avoid an
1587 accidental token paste for output. For simplicity, it is
1588 conservative, and occasionally advises a space where one is not
1589 needed, e.g. "." and ".2". */
041c3194 1590
93c80368
NB
1591int
1592cpp_avoid_paste (pfile, token1, token2)
c5a04734 1593 cpp_reader *pfile;
93c80368 1594 const cpp_token *token1, *token2;
c5a04734 1595{
93c80368
NB
1596 enum cpp_ttype a = token1->type, b = token2->type;
1597 cppchar_t c;
c5a04734 1598
93c80368
NB
1599 if (token1->flags & NAMED_OP)
1600 a = CPP_NAME;
1601 if (token2->flags & NAMED_OP)
1602 b = CPP_NAME;
c5a04734 1603
93c80368
NB
1604 c = EOF;
1605 if (token2->flags & DIGRAPH)
37b8524c 1606 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
1607 else if (token_spellings[b].category == SPELL_OPERATOR)
1608 c = token_spellings[b].name[0];
c5a04734 1609
93c80368 1610 /* Quickly get everything that can paste with an '='. */
37b8524c 1611 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 1612 return 1;
c5a04734 1613
93c80368 1614 switch (a)
c5a04734 1615 {
93c80368
NB
1616 case CPP_GREATER: return c == '>' || c == '?';
1617 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1618 case CPP_PLUS: return c == '+';
1619 case CPP_MINUS: return c == '-' || c == '>';
1620 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1621 case CPP_MOD: return c == ':' || c == '>';
1622 case CPP_AND: return c == '&';
1623 case CPP_OR: return c == '|';
1624 case CPP_COLON: return c == ':' || c == '>';
1625 case CPP_DEREF: return c == '*';
26ec42ee 1626 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
1627 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1628 case CPP_NAME: return ((b == CPP_NUMBER
1629 && name_p (pfile, &token2->val.str))
1630 || b == CPP_NAME
1631 || b == CPP_CHAR || b == CPP_STRING); /* L */
1632 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1633 || c == '.' || c == '+' || c == '-');
1634 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
6c53ebff 1635 && token1->val.c == '@'
93c80368
NB
1636 && (b == CPP_NAME || b == CPP_STRING));
1637 default: break;
c5a04734 1638 }
c5a04734 1639
417f3e3a 1640 return 0;
c5a04734
ZW
1641}
1642
93c80368
NB
1643/* Output all the remaining tokens on the current line, and a newline
1644 character, to FP. Leading whitespace is removed. */
c5a04734 1645void
93c80368 1646cpp_output_line (pfile, fp)
c5a04734 1647 cpp_reader *pfile;
93c80368 1648 FILE *fp;
c5a04734 1649{
93c80368 1650 cpp_token token;
96be6998 1651
7f2f1a66 1652 cpp_get_token (pfile, &token);
93c80368
NB
1653 token.flags &= ~PREV_WHITE;
1654 while (token.type != CPP_EOF)
96be6998 1655 {
93c80368 1656 cpp_output_token (&token, fp);
7f2f1a66 1657 cpp_get_token (pfile, &token);
96be6998
ZW
1658 }
1659
93c80368 1660 putc ('\n', fp);
041c3194 1661}
c5a04734 1662
c8a96070
NB
1663/* Returns the value of a hexadecimal digit. */
1664static unsigned int
1665hex_digit_value (c)
1666 unsigned int c;
1667{
1668 if (c >= 'a' && c <= 'f')
1669 return c - 'a' + 10;
1670 if (c >= 'A' && c <= 'F')
1671 return c - 'A' + 10;
1672 if (c >= '0' && c <= '9')
1673 return c - '0';
1674 abort ();
1675}
1676
1677/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence (C++ and C99).
1678
1679 [lex.charset]: The character designated by the universal character
1680 name \UNNNNNNNN is that character whose character short name in
1681 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1682 universal character name \uNNNN is that character whose character
1683 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1684 for a universal character name is less than 0x20 or in the range
1685 0x7F-0x9F (inclusive), or if the universal character name
1686 designates a character in the basic source character set, then the
1687 program is ill-formed.
1688
1689 We assume that wchar_t is Unicode, so we don't need to do any
1690 mapping. Is this ever wrong? */
1691
1692static unsigned int
1693read_ucs (pfile, pstr, limit, length)
1694 cpp_reader *pfile;
1695 const unsigned char **pstr;
1696 const unsigned char *limit;
1697 unsigned int length;
1698{
1699 const unsigned char *p = *pstr;
1700 unsigned int c, code = 0;
1701
1702 for (; length; --length)
1703 {
1704 if (p >= limit)
1705 {
1706 cpp_error (pfile, "incomplete universal-character-name");
1707 break;
1708 }
1709
1710 c = *p;
1711 if (ISXDIGIT (c))
1712 {
1713 code = (code << 4) + hex_digit_value (c);
1714 p++;
1715 }
1716 else
1717 {
1718 cpp_error (pfile,
1719 "non-hex digit '%c' in universal-character-name", c);
1720 break;
1721 }
1722
1723 }
1724
1725#ifdef TARGET_EBCDIC
1726 cpp_error (pfile, "universal-character-name on EBCDIC target");
1727 code = 0x3f; /* EBCDIC invalid character */
1728#else
1729 if (code > 0x9f && !(code & 0x80000000))
1730 ; /* True extended character, OK. */
1731 else if (code >= 0x20 && code < 0x7f)
1732 {
1733 /* ASCII printable character. The C character set consists of all of
1734 these except $, @ and `. We use hex escapes so that this also
1735 works with EBCDIC hosts. */
1736 if (code != 0x24 && code != 0x40 && code != 0x60)
1737 cpp_error (pfile, "universal-character-name used for '%c'", code);
1738 }
1739 else
1740 cpp_error (pfile, "invalid universal-character-name");
1741#endif
1742
1743 *pstr = p;
1744 return code;
1745}
1746
1747/* Interpret an escape sequence, and return its value. PSTR points to
1748 the input pointer, which is just after the backslash. LIMIT is how
1749 much text we have. MASK is the precision for the target type (char
1750 or wchar_t). TRADITIONAL, if true, does not interpret escapes that
1751 did not exist in traditional C. */
1752
1753static unsigned int
1754parse_escape (pfile, pstr, limit, mask, traditional)
1755 cpp_reader *pfile;
1756 const unsigned char **pstr;
1757 const unsigned char *limit;
1758 HOST_WIDE_INT mask;
1759 int traditional;
1760{
1761 int unknown = 0;
1762 const unsigned char *str = *pstr;
1763 unsigned int c = *str++;
1764
1765 switch (c)
1766 {
1767 case '\\': case '\'': case '"': case '?': break;
1768 case 'b': c = TARGET_BS; break;
1769 case 'f': c = TARGET_FF; break;
1770 case 'n': c = TARGET_NEWLINE; break;
1771 case 'r': c = TARGET_CR; break;
1772 case 't': c = TARGET_TAB; break;
1773 case 'v': c = TARGET_VT; break;
1774
1775 case '(': case '{': case '[': case '%':
1776 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1777 '\%' is used to prevent SCCS from getting confused. */
1778 unknown = CPP_PEDANTIC (pfile);
1779 break;
1780
1781 case 'a':
1782 if (CPP_WTRADITIONAL (pfile))
1783 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1784 if (!traditional)
1785 c = TARGET_BELL;
1786 break;
1787
1788 case 'e': case 'E':
1789 if (CPP_PEDANTIC (pfile))
1790 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1791 c = TARGET_ESC;
1792 break;
1793
1794 /* Warnings and support checks handled by read_ucs(). */
1795 case 'u': case 'U':
1796 if (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))
1797 {
1798 if (CPP_WTRADITIONAL (pfile))
1799 cpp_warning (pfile,
1800 "the meaning of '\\%c' varies with -traditional", c);
1801 c = read_ucs (pfile, &str, limit, c == 'u' ? 4 : 8);
1802 }
1803 else
1804 unknown = 1;
1805 break;
1806
1807 case 'x':
1808 if (CPP_WTRADITIONAL (pfile))
1809 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1810
1811 if (!traditional)
1812 {
1813 unsigned int i = 0, overflow = 0;
1814 int digits_found = 0;
1815
1816 while (str < limit)
1817 {
1818 c = *str;
1819 if (! ISXDIGIT (c))
1820 break;
1821 str++;
1822 overflow |= i ^ (i << 4 >> 4);
1823 i = (i << 4) + hex_digit_value (c);
1824 digits_found = 1;
1825 }
1826
1827 if (!digits_found)
1828 cpp_error (pfile, "\\x used with no following hex digits");
1829
1830 if (overflow | (i != (i & mask)))
1831 {
1832 cpp_pedwarn (pfile, "hex escape sequence out of range");
1833 i &= mask;
1834 }
1835 c = i;
1836 }
1837 break;
1838
1839 case '0': case '1': case '2': case '3':
1840 case '4': case '5': case '6': case '7':
1841 {
1842 unsigned int i = c - '0';
1843 int count = 0;
1844
1845 while (str < limit && ++count < 3)
1846 {
1847 c = *str;
1848 if (c < '0' || c > '7')
1849 break;
1850 str++;
1851 i = (i << 3) + c - '0';
1852 }
1853
1854 if (i != (i & mask))
1855 {
1856 cpp_pedwarn (pfile, "octal escape sequence out of range");
1857 i &= mask;
1858 }
1859 c = i;
1860 }
1861 break;
1862
1863 default:
1864 unknown = 1;
1865 break;
1866 }
1867
1868 if (unknown)
1869 {
1870 if (ISGRAPH (c))
1871 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1872 else
1873 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1874 }
1875
1876 *pstr = str;
1877 return c;
1878}
1879
1880#ifndef MAX_CHAR_TYPE_SIZE
1881#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1882#endif
1883
1884#ifndef MAX_WCHAR_TYPE_SIZE
1885#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1886#endif
1887
1888/* Interpret a (possibly wide) character constant in TOKEN.
1889 WARN_MULTI warns about multi-character charconsts, if not
1890 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1891 that did not exist in traditional C. PCHARS_SEEN points to a
1892 variable that is filled in with the number of characters seen. */
1893HOST_WIDE_INT
1894cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1895 cpp_reader *pfile;
1896 const cpp_token *token;
1897 int warn_multi;
1898 int traditional;
1899 unsigned int *pchars_seen;
1900{
1901 const unsigned char *str = token->val.str.text;
1902 const unsigned char *limit = str + token->val.str.len;
1903 unsigned int chars_seen = 0;
1904 unsigned int width, max_chars, c;
1905 HOST_WIDE_INT result = 0, mask;
1906
1907#ifdef MULTIBYTE_CHARS
1908 (void) local_mbtowc (NULL, NULL, 0);
1909#endif
1910
1911 /* Width in bits. */
1912 if (token->type == CPP_CHAR)
1913 width = MAX_CHAR_TYPE_SIZE;
1914 else
1915 width = MAX_WCHAR_TYPE_SIZE;
1916
1917 if (width < HOST_BITS_PER_WIDE_INT)
1918 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1919 else
1920 mask = ~0;
1921 max_chars = HOST_BITS_PER_WIDE_INT / width;
1922
1923 while (str < limit)
1924 {
1925#ifdef MULTIBYTE_CHARS
1926 wchar_t wc;
1927 int char_len;
1928
1929 char_len = local_mbtowc (&wc, str, limit - str);
1930 if (char_len == -1)
1931 {
1932 cpp_warning (pfile, "ignoring invalid multibyte character");
1933 c = *str++;
1934 }
1935 else
1936 {
1937 str += char_len;
1938 c = wc;
1939 }
1940#else
1941 c = *str++;
1942#endif
1943
1944 if (c == '\\')
1945 {
1946 c = parse_escape (pfile, &str, limit, mask, traditional);
1947 if (width < HOST_BITS_PER_WIDE_INT && c > mask)
1948 cpp_pedwarn (pfile, "escape sequence out of range for character");
1949 }
1950
1951#ifdef MAP_CHARACTER
1952 if (ISPRINT (c))
1953 c = MAP_CHARACTER (c);
1954#endif
1955
1956 /* Merge character into result; ignore excess chars. */
1957 if (++chars_seen <= max_chars)
1958 {
1959 if (width < HOST_BITS_PER_WIDE_INT)
1960 result = (result << width) | (c & mask);
1961 else
1962 result = c;
1963 }
1964 }
1965
1966 if (chars_seen == 0)
1967 cpp_error (pfile, "empty character constant");
1968 else if (chars_seen > max_chars)
1969 {
1970 chars_seen = max_chars;
1971 cpp_error (pfile, "character constant too long");
1972 }
1973 else if (chars_seen > 1 && !traditional && warn_multi)
1974 cpp_warning (pfile, "multi-character character constant");
1975
1976 /* If char type is signed, sign-extend the constant. The
1977 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
1978 if (token->type == CPP_CHAR && chars_seen)
1979 {
1980 unsigned int nbits = chars_seen * width;
1981 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
1982
1983 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
1984 || ((result >> (nbits - 1)) & 1) == 0)
1985 result &= mask;
1986 else
1987 result |= ~mask;
1988 }
1989
1990 *pchars_seen = chars_seen;
1991 return result;
1992}
1993
93c80368 1994/* Memory pools. */
417f3e3a 1995
93c80368 1996struct dummy
417f3e3a 1997{
93c80368
NB
1998 char c;
1999 union
2000 {
2001 double d;
2002 int *p;
2003 } u;
2004};
417f3e3a 2005
93c80368 2006#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
417f3e3a 2007
93c80368
NB
2008static int
2009chunk_suitable (pool, chunk, size)
2010 cpp_pool *pool;
2011 cpp_chunk *chunk;
2012 unsigned int size;
2013{
2014 /* Being at least twice SIZE means we can use memcpy in
2015 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2016 anyway. */
2017 return (chunk && pool->locked != chunk
2018 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
041c3194 2019}
c5a04734 2020
93c80368
NB
2021/* Returns the end of the new pool. PTR points to a char in the old
2022 pool, and is updated to point to the same char in the new pool. */
2023unsigned char *
2024_cpp_next_chunk (pool, len, ptr)
2025 cpp_pool *pool;
2026 unsigned int len;
2027 unsigned char **ptr;
041c3194 2028{
93c80368 2029 cpp_chunk *chunk = pool->cur->next;
c5a04734 2030
93c80368
NB
2031 /* LEN is the minimum size we want in the new pool. */
2032 len += POOL_ROOM (pool);
2033 if (! chunk_suitable (pool, chunk, len))
041c3194 2034 {
93c80368 2035 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
c5a04734 2036
93c80368
NB
2037 chunk->next = pool->cur->next;
2038 pool->cur->next = chunk;
c5a04734
ZW
2039 }
2040
93c80368
NB
2041 /* Update the pointer before changing chunk's front. */
2042 if (ptr)
2043 *ptr += chunk->base - POOL_FRONT (pool);
041c3194 2044
93c80368
NB
2045 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2046 chunk->front = chunk->base;
041c3194 2047
93c80368
NB
2048 pool->cur = chunk;
2049 return POOL_LIMIT (pool);
c5a04734
ZW
2050}
2051
93c80368
NB
2052static cpp_chunk *
2053new_chunk (size)
2054 unsigned int size;
041c3194 2055{
93c80368
NB
2056 unsigned char *base;
2057 cpp_chunk *result;
3fef5b2b 2058
269592a8 2059 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
93c80368
NB
2060 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2061 /* Put the chunk descriptor at the end. Then chunk overruns will
2062 cause obvious chaos. */
2063 result = (cpp_chunk *) (base + size);
2064 result->base = base;
2065 result->front = base;
2066 result->limit = base + size;
2067 result->next = 0;
417f3e3a 2068
93c80368 2069 return result;
041c3194
ZW
2070}
2071
93c80368
NB
2072void
2073_cpp_init_pool (pool, size, align, temp)
2074 cpp_pool *pool;
2075 unsigned int size, align, temp;
2076{
2077 if (align == 0)
2078 align = DEFAULT_ALIGNMENT;
2079 if (align & (align - 1))
2080 abort ();
2081 pool->align = align;
2082 pool->cur = new_chunk (size);
2083 pool->locked = 0;
2084 pool->locks = 0;
2085 if (temp)
2086 pool->cur->next = pool->cur;
041c3194
ZW
2087}
2088
93c80368
NB
2089void
2090_cpp_lock_pool (pool)
2091 cpp_pool *pool;
041c3194 2092{
93c80368
NB
2093 if (pool->locks++ == 0)
2094 pool->locked = pool->cur;
041c3194
ZW
2095}
2096
93c80368
NB
2097void
2098_cpp_unlock_pool (pool)
2099 cpp_pool *pool;
041c3194 2100{
93c80368
NB
2101 if (--pool->locks == 0)
2102 pool->locked = 0;
041c3194
ZW
2103}
2104
93c80368
NB
2105void
2106_cpp_free_pool (pool)
2107 cpp_pool *pool;
3fef5b2b 2108{
93c80368 2109 cpp_chunk *chunk = pool->cur, *next;
3fef5b2b 2110
93c80368 2111 do
3fef5b2b 2112 {
93c80368
NB
2113 next = chunk->next;
2114 free (chunk->base);
2115 chunk = next;
3fef5b2b 2116 }
93c80368 2117 while (chunk && chunk != pool->cur);
041c3194 2118}
041c3194 2119
93c80368
NB
2120/* Reserve LEN bytes from a memory pool. */
2121unsigned char *
2122_cpp_pool_reserve (pool, len)
2123 cpp_pool *pool;
2124 unsigned int len;
041c3194 2125{
269592a8 2126 len = POOL_ALIGN (len, pool->align);
93c80368
NB
2127 if (len > (unsigned int) POOL_ROOM (pool))
2128 _cpp_next_chunk (pool, len, 0);
041c3194 2129
93c80368 2130 return POOL_FRONT (pool);
c5a04734
ZW
2131}
2132
93c80368
NB
2133/* Allocate LEN bytes from a memory pool. */
2134unsigned char *
2135_cpp_pool_alloc (pool, len)
2136 cpp_pool *pool;
2137 unsigned int len;
041c3194 2138{
93c80368 2139 unsigned char *result = _cpp_pool_reserve (pool, len);
417f3e3a 2140
93c80368
NB
2141 POOL_COMMIT (pool, len);
2142 return result;
041c3194 2143}
This page took 0.612483 seconds and 5 git commands to generate.